From: kaf24@scramble.cl.cam.ac.uk Date: Thu, 10 Jun 2004 16:59:06 +0000 (+0000) Subject: bitkeeper revision 1.952 (40c8935a3XSRdQfnx5RoO7XgaggvOQ) X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~18169 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22man:///%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22man:/?a=commitdiff_plain;h=7f68576b77ef3c9fde1009100690ff996d4490e0;p=xen.git bitkeeper revision 1.952 (40c8935a3XSRdQfnx5RoO7XgaggvOQ) Towards x86_64 support. Merged a bunch of the existing x86_64 stuff back into a generic 'x86' architecture. Aim is to share as much as possible between 32- and 64-bit worlds. --- diff --git a/.rootkeys b/.rootkeys index ef162ef6ca..556f2de38c 100644 --- a/.rootkeys +++ b/.rootkeys @@ -228,43 +228,40 @@ 3f72f1bdJPsV3JCnBqs9ddL9tr6D2g xen/COPYING 3ddb79bcbOVHh38VJzc97-JEGD4dJQ xen/Makefile 3ddb79bcWnTwYsQRWl_PaneJfa6p0w xen/Rules.mk -3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/i386/Makefile -3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/i386/Rules.mk -3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/i386/acpi.c -3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/i386/apic.c -3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen/arch/i386/boot/boot.S -3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/i386/delay.c -3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/i386/domain_page.c -3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/i386/entry.S -3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/i386/extable.c -3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/i386/flushtlb.c -3ddb79bcesE5E-lS4QhRhlqXxqj9cA xen/arch/i386/i387.c -3ddb79bcCAq6IpdkHueChoVTfXqEQQ xen/arch/i386/i8259.c -3ddb79bcBit4xJXbwtX0kb1hh2uO1Q xen/arch/i386/idle0_task.c -3ddb79bcKIkRR0kqWaJhe5VUDkMdxg xen/arch/i386/io_apic.c -3ddb79bc1uNlAtc-84Ioq4qfcnI_CQ xen/arch/i386/ioremap.c -3ddb79bdqfIcjkz_h9Hvtp8Tk_19Zw xen/arch/i386/irq.c -3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/i386/mm.c -3ddb79bdS4UeWWXDH-FaBKqcpMFcnw xen/arch/i386/mpparse.c -3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/i386/nmi.c -3ddb79bcnL-_Dtsbtjgxl7vJU3vBiQ xen/arch/i386/pci-dma.c -3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen/arch/i386/pci-i386.c -3ddb79bdIKgipvGoqExEQ7jawfVowA xen/arch/i386/pci-i386.h -3ddb79bdHe6_Uij4-glW91vInNtBYQ xen/arch/i386/pci-irq.c -3ddb79bcZ_2FxINljqNSkqa17ISyJw xen/arch/i386/pci-pc.c -40a4dfced2dnSzbKgJFlD3chKHexjQ xen/arch/i386/pdb-linux.c -4022a73czgX7d-2zfF_cb33oVemApQ xen/arch/i386/pdb-stub.c -3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/i386/process.c -3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen/arch/i386/rwlock.c -3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen/arch/i386/setup.c -3ddb79bcSx2e8JSR3pdSGa8x1ScYzA xen/arch/i386/smp.c -3ddb79bcfUN3-UBCPzX26IU8bq-3aw xen/arch/i386/smpboot.c -3ddb79bc-Udq7ol-NX4q9XsYnN7A2Q xen/arch/i386/time.c -3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen/arch/i386/trampoline.S -3ddb79bcOftONV9h4QCxXOfiT0h91w xen/arch/i386/traps.c -3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/i386/usercopy.c -3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/i386/xen.lds -404f1b91uzXgPOtIhs8UZPGbZvlHfg xen/arch/x86_64/Rules.mk +3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/x86/Makefile +3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/x86/Rules.mk +3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi.c +3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/x86/apic.c +3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen/arch/x86/boot/boot.S +3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/x86/delay.c +3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/domain_page.c +3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/entry.S +3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/x86/extable.c +3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/x86/flushtlb.c +3ddb79bcesE5E-lS4QhRhlqXxqj9cA xen/arch/x86/i387.c +3ddb79bcCAq6IpdkHueChoVTfXqEQQ xen/arch/x86/i8259.c +3ddb79bcBit4xJXbwtX0kb1hh2uO1Q xen/arch/x86/idle0_task.c +3ddb79bcKIkRR0kqWaJhe5VUDkMdxg xen/arch/x86/io_apic.c +3ddb79bdqfIcjkz_h9Hvtp8Tk_19Zw xen/arch/x86/irq.c +3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/mm.c +3ddb79bdS4UeWWXDH-FaBKqcpMFcnw xen/arch/x86/mpparse.c +3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/x86/nmi.c +3ddb79bdHe6_Uij4-glW91vInNtBYQ xen/arch/x86/pci-irq.c +3ddb79bcZ_2FxINljqNSkqa17ISyJw xen/arch/x86/pci-pc.c +3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen/arch/x86/pci-x86.c +3ddb79bdIKgipvGoqExEQ7jawfVowA xen/arch/x86/pci-x86.h +40a4dfced2dnSzbKgJFlD3chKHexjQ xen/arch/x86/pdb-linux.c +4022a73czgX7d-2zfF_cb33oVemApQ xen/arch/x86/pdb-stub.c +3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/x86/process.c +3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen/arch/x86/rwlock.c +3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen/arch/x86/setup.c +3ddb79bcSx2e8JSR3pdSGa8x1ScYzA xen/arch/x86/smp.c +3ddb79bcfUN3-UBCPzX26IU8bq-3aw xen/arch/x86/smpboot.c +3ddb79bc-Udq7ol-NX4q9XsYnN7A2Q xen/arch/x86/time.c +3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen/arch/x86/trampoline.S +3ddb79bcOftONV9h4QCxXOfiT0h91w xen/arch/x86/traps.c +3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/usercopy.c +3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/xen.lds 3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen/common/Makefile 3e397e66AyyD5fYraAySWuwi9uqSXg xen/common/ac_timer.c 4022a73c_BbDFd2YJ_NQYVvKX5Oz7w xen/common/debug-linux.c @@ -347,103 +344,60 @@ 40715b2dKRW7A71SNaeV6zfrEzYxPw xen/include/acpi/platform/acenv.h 40715b2d8fYydJMcODFrV1ocLklGDg xen/include/acpi/platform/acgcc.h 40715b2d1yZkqyAt0kgx2xEwsatuuA xen/include/acpi/platform/aclinux.h -40715b2dWe0tDhx9LkLXzTQkvD49RA xen/include/asm-i386/acpi.h -3ddb79c3l4IiQtf6MS2jIzcd-hJS8g xen/include/asm-i386/apic.h -3ddb79c3QJYWr8LLGdonLbWmNb9pQQ xen/include/asm-i386/apicdef.h -3ddb79c3OiG9eTsi9Dy3F_OkuRAzKA xen/include/asm-i386/atomic.h -3ddb79c3rM-Ote0Xn6Ytg8Y6YqAG-A xen/include/asm-i386/bitops.h -3ddb79c3KhTI0F_Iw_hRL9QEyOVK-g xen/include/asm-i386/cache.h -404f1b920OQVnrbnXnySS-WxrH9Wzw xen/include/asm-i386/config.h -3ddb79c2LLt11EQHjrd6sB7FUqvFfA xen/include/asm-i386/cpufeature.h -3ddb79c2ADvRmdexd9y3AYK9_NTx-Q xen/include/asm-i386/current.h -3ddb79c2jFkPAZTDmU35L6IUssYMgQ xen/include/asm-i386/debugreg.h -3ddb79c3r9-31dIsewPV3P3i8HALsQ xen/include/asm-i386/delay.h -3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen/include/asm-i386/desc.h -40715b2dTokMLYGSuD58BnxOqyWVew xen/include/asm-i386/div64.h -3e564149UkU91RX7onzpCAmbj_IFjw xen/include/asm-i386/dma.h -3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/asm-i386/domain_page.h -3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen/include/asm-i386/fixmap.h -3e2d29944GI24gf7vOP_7x8EyuqxeA xen/include/asm-i386/flushtlb.h -3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen/include/asm-i386/hardirq.h -3ddb79c3BFEIwXR4IsWbwp4BoL4DkA xen/include/asm-i386/hdreg.h -3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen/include/asm-i386/i387.h -3ddb79c3otbjpnqFDSzSeD0J-0xcwg xen/include/asm-i386/ide.h -3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen/include/asm-i386/io.h -3ddb79c2TKeScYHQZreTdHqYNLbehQ xen/include/asm-i386/io_apic.h -3ddb79c2L7rTlFzazOLW1XuSZefpFw xen/include/asm-i386/irq.h -404f1b93OjLO4bFfBXYNaJdIqlNz-Q xen/include/asm-i386/ldt.h -3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen/include/asm-i386/mc146818rtc.h -3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-i386/mpspec.h -3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-i386/msr.h -3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-i386/page.h -3e450943kzme29HPCtq5HNOVQkddfw xen/include/asm-i386/param.h -3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen/include/asm-i386/pci.h -4022a73diKn2Ax4-R4gzk59lm1YdDg xen/include/asm-i386/pdb.h -3ddb79c3nm2zdzeO6Mj8g7ex3txgGw xen/include/asm-i386/pgalloc.h -3ddb79c2QF5-pZGzuX4QukPCDAl59A xen/include/asm-i386/processor.h -3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen/include/asm-i386/ptrace.h -3ddb79c2plf7ciNgoNjU-RsbUzawsw xen/include/asm-i386/rwlock.h -3ddb79c2mJI9YuGMScjofPlD8EdtgA xen/include/asm-i386/scatterlist.h -3ddb79c3Hgbb2g8CyWLMCK-6_ZVQSQ xen/include/asm-i386/smp.h -3ddb79c3jn8ALV_S9W5aeTYUQRKBpg xen/include/asm-i386/smpboot.h -3ddb79c3e9DCEoR-WzNxcOQDzLu7BQ xen/include/asm-i386/softirq.h -3ddb79c3NiyQE2vQnyGiaBnNjBO1rA xen/include/asm-i386/spinlock.h -3e7f358aG11EvMI9VJ4_9hD4LUO7rQ xen/include/asm-i386/string.h -3ddb79c3ezddh34MdelJpa5tNR00Dw xen/include/asm-i386/system.h -3e397e66xPNc8eaSqC9pPbyAtRGzHA xen/include/asm-i386/time.h -3e450943TfE-iovQIY_tMO_VdGsPhA xen/include/asm-i386/timex.h -3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen/include/asm-i386/types.h -3ddb79c3M2n1ROZH6xk3HbyN4CPDqg xen/include/asm-i386/uaccess.h -3ddb79c3uPGcP_l_2xyGgBSWd5aC-Q xen/include/asm-i386/unaligned.h -404f1b95z0B0jb2IfvZJ7uvmYqsqpg xen/include/asm-x86_64/apic.h -404f1b95_OZH-rw_durHSa_Kgdo95A xen/include/asm-x86_64/apicdef.h -404f1b967UWSPkB0cwT9v-rilNzkHw xen/include/asm-x86_64/atomic.h -404f1b97UDomt73PizniyrCaxVRkXQ xen/include/asm-x86_64/bitops.h -404f1b99W-dMUlFpsvt--tVpQvNgEQ xen/include/asm-x86_64/cache.h -404f1b9b_phpQlRnyiWqP6RodfZDpg xen/include/asm-x86_64/config.h -404f1b9cz7UV611DK6CTY1ZAiwGtTw xen/include/asm-x86_64/cpufeature.h -404f1b9ceJeGVaPNIENm2FkK0AgEOQ xen/include/asm-x86_64/current.h -404f1b9d854xae6HKv-9W8lLSgROdQ xen/include/asm-x86_64/debugreg.h -404f1b9eRm9rtcM29P5O2nrPFOGSow xen/include/asm-x86_64/delay.h -404f1b9fl6AQ_a-T1TDK3fuwTPXmHw xen/include/asm-x86_64/desc.h -404f1ba05mjpUREtosjzz3PPL5cTJA xen/include/asm-x86_64/dma.h -404f1ba13mnjeZT2ytPm0DB63703nA xen/include/asm-x86_64/domain_page.h -404f1ba31i0gS-cdqvd0RZX1HVnxsA xen/include/asm-x86_64/fixmap.h -404f1ba4KXQ_V7HOkenF04KRU7Tl7w xen/include/asm-x86_64/flushtlb.h -404f1ba5Sqzc22eXORShvCF9-rpMbA xen/include/asm-x86_64/hardirq.h -404f1ba6_nDjomU9HJVvUugj63LvEg xen/include/asm-x86_64/hdreg.h -404f1ba7Q-lF892SDZLWjJ62wmauSA xen/include/asm-x86_64/i387.h -404f1ba8yxfnHH0NWC1B-wmd6bK2wg xen/include/asm-x86_64/ide.h -404f1ba9_7NIylhSRmokesN8TNIiNg xen/include/asm-x86_64/io.h -404f1baaiXXy7vChbzKmluSyJ5LWIw xen/include/asm-x86_64/io_apic.h -404f1baceMqjaYFs7oZoNsPkaZJ0WQ xen/include/asm-x86_64/irq.h -404f1badfXZJZ2sU8sh9PS2EZvd19Q xen/include/asm-x86_64/ldt.h -404f1bae_yI5vMg-_k4EySMERbbz2Q xen/include/asm-x86_64/mc146818rtc.h -404f1bafYfNwntXQGIggyj7D6YruJQ xen/include/asm-x86_64/mpspec.h -404f1bb0asrts1dyLQhyARCgzhL0NA xen/include/asm-x86_64/msr.h -404f1bb1LSCqrMDSfRAti5NdMQPJBQ xen/include/asm-x86_64/page.h -404f1bb2IUaGWD82SrQFaacyBixVFw xen/include/asm-x86_64/param.h -404f1bb3zSQfhMuQ24xNtq9Ed09jGw xen/include/asm-x86_64/pci.h -404f1bb41Yl-5ZjIWnG66HDCj6OIWA xen/include/asm-x86_64/pda.h -404f1bb5toGAnZVAlJ2fWWMv28DFJQ xen/include/asm-x86_64/pdb.h -404f1bb6pz982jtehZacFKhFUac0ug xen/include/asm-x86_64/pgalloc.h -404f1bb756fZfxk5HDx7J7BW3R-1jQ xen/include/asm-x86_64/processor.h -404f1bb86rAXB3aLS1vYdcqpJiEcyg xen/include/asm-x86_64/ptrace.h -404f1bb9K0pcyDrV4Ctva1HUczoueQ xen/include/asm-x86_64/rwlock.h -404f1bbaIdS7vc3sE032fQG6EnY8AQ xen/include/asm-x86_64/scatterlist.h -404f1bbbR5n83SiPof3joEPv9xWPPA xen/include/asm-x86_64/smp.h -404f1bbc67CEECfR8ATd7dPD1ajLng xen/include/asm-x86_64/smpboot.h -404f1bbdXaaPrIp5AUIjC8Hsp2H0Aw xen/include/asm-x86_64/softirq.h -404f1bbeomkO5YarnkIRWxVhlB5EJA xen/include/asm-x86_64/spinlock.h -404f1bbf82VK-kyDVBmR7CTvtTBKaw xen/include/asm-x86_64/string.h -404f1bc0laOnGpDxFpgdiuZpEyOOKw xen/include/asm-x86_64/system.h -404f1bc1FnfxOhmgWYHP97TPqA40Pw xen/include/asm-x86_64/time.h -404f1bc2mx9ZbazcdFh-AN70ZvNMJQ xen/include/asm-x86_64/timex.h -404f1bc3R2o0PIpQme8bDWeHcqHNGw xen/include/asm-x86_64/types.h -404f1bc4tWkB9Qr8RkKtZGW5eMQzhw xen/include/asm-x86_64/uaccess.h -404f1bc5idyWKKROGo_hvHVx58Gmkw xen/include/asm-x86_64/unaligned.h +40715b2dWe0tDhx9LkLXzTQkvD49RA xen/include/asm-x86/acpi.h +3ddb79c3l4IiQtf6MS2jIzcd-hJS8g xen/include/asm-x86/apic.h +3ddb79c3QJYWr8LLGdonLbWmNb9pQQ xen/include/asm-x86/apicdef.h +3ddb79c3OiG9eTsi9Dy3F_OkuRAzKA xen/include/asm-x86/atomic.h +3ddb79c3rM-Ote0Xn6Ytg8Y6YqAG-A xen/include/asm-x86/bitops.h +3ddb79c3KhTI0F_Iw_hRL9QEyOVK-g xen/include/asm-x86/cache.h +404f1b920OQVnrbnXnySS-WxrH9Wzw xen/include/asm-x86/config.h +3ddb79c2LLt11EQHjrd6sB7FUqvFfA xen/include/asm-x86/cpufeature.h +3ddb79c2ADvRmdexd9y3AYK9_NTx-Q xen/include/asm-x86/current.h +3ddb79c2jFkPAZTDmU35L6IUssYMgQ xen/include/asm-x86/debugreg.h +3ddb79c3r9-31dIsewPV3P3i8HALsQ xen/include/asm-x86/delay.h +3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen/include/asm-x86/desc.h +40715b2dTokMLYGSuD58BnxOqyWVew xen/include/asm-x86/div64.h +3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/asm-x86/domain_page.h +3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen/include/asm-x86/fixmap.h +3e2d29944GI24gf7vOP_7x8EyuqxeA xen/include/asm-x86/flushtlb.h +3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen/include/asm-x86/hardirq.h +3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen/include/asm-x86/i387.h +3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen/include/asm-x86/io.h +3ddb79c2TKeScYHQZreTdHqYNLbehQ xen/include/asm-x86/io_apic.h +3ddb79c2L7rTlFzazOLW1XuSZefpFw xen/include/asm-x86/irq.h +404f1b93OjLO4bFfBXYNaJdIqlNz-Q xen/include/asm-x86/ldt.h +3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen/include/asm-x86/mc146818rtc.h +3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-x86/mpspec.h +3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-x86/msr.h +3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-x86/page.h +3e450943kzme29HPCtq5HNOVQkddfw xen/include/asm-x86/param.h +3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen/include/asm-x86/pci.h +4022a73diKn2Ax4-R4gzk59lm1YdDg xen/include/asm-x86/pdb.h +3ddb79c2QF5-pZGzuX4QukPCDAl59A xen/include/asm-x86/processor.h +3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen/include/asm-x86/ptrace.h +3ddb79c2plf7ciNgoNjU-RsbUzawsw xen/include/asm-x86/rwlock.h +3ddb79c3Hgbb2g8CyWLMCK-6_ZVQSQ xen/include/asm-x86/smp.h +3ddb79c3jn8ALV_S9W5aeTYUQRKBpg xen/include/asm-x86/smpboot.h +3ddb79c3e9DCEoR-WzNxcOQDzLu7BQ xen/include/asm-x86/softirq.h +3ddb79c3NiyQE2vQnyGiaBnNjBO1rA xen/include/asm-x86/spinlock.h +3e7f358aG11EvMI9VJ4_9hD4LUO7rQ xen/include/asm-x86/string.h +3ddb79c3ezddh34MdelJpa5tNR00Dw xen/include/asm-x86/system.h +3e397e66xPNc8eaSqC9pPbyAtRGzHA xen/include/asm-x86/time.h +3e450943TfE-iovQIY_tMO_VdGsPhA xen/include/asm-x86/timex.h +3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen/include/asm-x86/types.h +3ddb79c3M2n1ROZH6xk3HbyN4CPDqg xen/include/asm-x86/uaccess.h +3ddb79c3uPGcP_l_2xyGgBSWd5aC-Q xen/include/asm-x86/unaligned.h +404f1b9b_phpQlRnyiWqP6RodfZDpg xen/include/asm-x86/x86_64/config.h +404f1b9ceJeGVaPNIENm2FkK0AgEOQ xen/include/asm-x86/x86_64/current.h +404f1b9fl6AQ_a-T1TDK3fuwTPXmHw xen/include/asm-x86/x86_64/desc.h +404f1badfXZJZ2sU8sh9PS2EZvd19Q xen/include/asm-x86/x86_64/ldt.h +404f1bb1LSCqrMDSfRAti5NdMQPJBQ xen/include/asm-x86/x86_64/page.h +404f1bb41Yl-5ZjIWnG66HDCj6OIWA xen/include/asm-x86/x86_64/pda.h +404f1bb756fZfxk5HDx7J7BW3R-1jQ xen/include/asm-x86/x86_64/processor.h +404f1bb86rAXB3aLS1vYdcqpJiEcyg xen/include/asm-x86/x86_64/ptrace.h +404f1bc4tWkB9Qr8RkKtZGW5eMQzhw xen/include/asm-x86/x86_64/uaccess.h 400304fcmRQmDdFYEzDh0wcBba9alg xen/include/hypervisor-ifs/COPYING -404f1bc68SXxmv0zQpXBWGrCzSyp8w xen/include/hypervisor-ifs/arch-i386/hypervisor-if.h +404f1bc68SXxmv0zQpXBWGrCzSyp8w xen/include/hypervisor-ifs/arch-x86/hypervisor-if.h 404f1bc7IwU-qnH8mJeVu0YsNGMrcw xen/include/hypervisor-ifs/arch-x86_64/hypervisor-if.h 3ddb79c2PMeWTK86y4C3F4MzHw4A1g xen/include/hypervisor-ifs/dom0_ops.h 403cd194j2pyLqXD8FJ-ukvZzkPenw xen/include/hypervisor-ifs/event_channel.h diff --git a/xen/Rules.mk b/xen/Rules.mk index ffd18003ab..6073e113c2 100644 --- a/xen/Rules.mk +++ b/xen/Rules.mk @@ -4,8 +4,14 @@ debugger ?= n perfc ?= n trace ?= n -COMPILE_ARCH := $(shell uname -m | sed -e s/i.86/i386/) -TARGET_ARCH ?= $(COMPILE_ARCH) +# Currently supported architectures: +# {COMPILE,TARGET}_ARCH := x86 +# {COMPILE,TARGET}_SUBARCH := x86_32 | x86_64 +COMPILE_ARCH := x86 +COMPILE_SUBARCH := $(shell uname -m | sed -e s/i.86/x86_32/) + +TARGET_ARCH ?= $(COMPILE_ARCH) +TARGET_SUBARCH ?= $(COMPILE_SUBARCH) TARGET := $(BASEDIR)/xen HDRS := $(wildcard $(BASEDIR)/include/xen/*.h) diff --git a/xen/arch/i386/Makefile b/xen/arch/i386/Makefile deleted file mode 100644 index 8257f479ad..0000000000 --- a/xen/arch/i386/Makefile +++ /dev/null @@ -1,23 +0,0 @@ - -include $(BASEDIR)/Rules.mk - -ifneq ($(debugger),y) -OBJS := $(subst pdb-linux.o,,$(OBJS)) -OBJS := $(subst pdb-stub.o,,$(OBJS)) -endif - -# What happens here? We link monitor object files together, starting -# at MONITOR_BASE (a very high address). But bootloader cannot put -# things there, so we initially load at LOAD_BASE. A hacky little -# tool called `elf-reloc' is used to modify segment offsets from -# MONITOR_BASE-relative to LOAD_BASE-relative. -# (NB. Linux gets round this by turning its image into raw binary, then -# wrapping that with a low-memory bootstrapper.) -default: boot/boot.o $(OBJS) - $(LD) -r -o arch.o $(OBJS) - $(LD) $(LDFLAGS) boot/boot.o $(ALL_OBJS) -o $(TARGET).dbg - objcopy -R .note -R .comment -S $(TARGET).dbg $(TARGET) - $(BASEDIR)/tools/elf-reloc $(MONITOR_BASE) $(LOAD_BASE) $(TARGET) - -clean: - rm -f *.o *~ core boot/*.o boot/*~ boot/core diff --git a/xen/arch/i386/Rules.mk b/xen/arch/i386/Rules.mk deleted file mode 100644 index b2a30bffee..0000000000 --- a/xen/arch/i386/Rules.mk +++ /dev/null @@ -1,19 +0,0 @@ -######################################## -# x86-specific definitions - -CC := gcc -LD := ld -# Linker should relocate monitor to this address -MONITOR_BASE := 0xFC500000 -# Bootloader should load monitor to this real address -LOAD_BASE := 0x00100000 -CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing -O3 -CFLAGS += -iwithprefix include -Wall -Werror -DMONITOR_BASE=$(MONITOR_BASE) -CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -CFLAGS += -Wno-pointer-arith -Wredundant-decls -m32 -TARGET_CPU := i686 -CFLAGS += -march=$(TARGET_CPU) -LDARCHFLAGS := --oformat elf32-i386 -LDFLAGS := -T xen.lds -N - - diff --git a/xen/arch/i386/acpi.c b/xen/arch/i386/acpi.c deleted file mode 100644 index ea621b7cea..0000000000 --- a/xen/arch/i386/acpi.c +++ /dev/null @@ -1,676 +0,0 @@ -/* - * acpi.c - Architecture-Specific Low-Level ACPI Support - * - * Copyright (C) 2001, 2002 Paul Diefenbaugh - * Copyright (C) 2001 Jun Nakajima - * Copyright (C) 2001 Patrick Mochel - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include -#include -#include -#include -/*#include */ -#include -#include -/*#include */ -#include -#include -#include -#include -#include -#include -#include -/*#include */ -#include -#include -#include -/*#include */ -#include - - -#define PREFIX "ACPI: " - -int acpi_lapic = 0; -int acpi_ioapic = 0; - -/* -------------------------------------------------------------------------- - Boot-time Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI_BOOT -int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */ -int acpi_ht __initdata = 1; /* enable HT */ - -enum acpi_irq_model_id acpi_irq_model; - - -/* - * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, - * to map the target physical address. The problem is that set_fixmap() - * provides a single page, and it is possible that the page is not - * sufficient. - * By using this area, we can map up to MAX_IO_APICS pages temporarily, - * i.e. until the next __va_range() call. - * - * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* - * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and - * count idx down while incrementing the phys address. - */ -char *__acpi_map_table(unsigned long phys, unsigned long size) -{ - unsigned long base, offset, mapped_size; - int idx; - - if (phys + size < 8*1024*1024) - return __va(phys); - - offset = phys & (PAGE_SIZE - 1); - mapped_size = PAGE_SIZE - offset; - set_fixmap(FIX_ACPI_END, phys); - base = fix_to_virt(FIX_ACPI_END); - - /* - * Most cases can be covered by the below. - */ - idx = FIX_ACPI_END; - while (mapped_size < size) { - if (--idx < FIX_ACPI_BEGIN) - return 0; /* cannot handle this */ - phys += PAGE_SIZE; - set_fixmap(idx, phys); - mapped_size += PAGE_SIZE; - } - - return ((unsigned char *) base + offset); -} - - -#ifdef CONFIG_X86_LOCAL_APIC - -static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; - - -static int __init -acpi_parse_madt ( - unsigned long phys_addr, - unsigned long size) -{ - struct acpi_table_madt *madt = NULL; - - if (!phys_addr || !size) - return -EINVAL; - - madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size); - if (!madt) { - printk(KERN_WARNING PREFIX "Unable to map MADT\n"); - return -ENODEV; - } - - if (madt->lapic_address) - acpi_lapic_addr = (u64) madt->lapic_address; - - printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n", - madt->lapic_address); - - detect_clustered_apic(madt->header.oem_id, madt->header.oem_table_id); - - return 0; -} - - -static int __init -acpi_parse_lapic ( - acpi_table_entry_header *header) -{ - struct acpi_table_lapic *processor = NULL; - - processor = (struct acpi_table_lapic*) header; - if (!processor) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - mp_register_lapic ( - processor->id, /* APIC ID */ - processor->flags.enabled); /* Enabled? */ - - return 0; -} - - -static int __init -acpi_parse_lapic_addr_ovr ( - acpi_table_entry_header *header) -{ - struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; - - lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header; - if (!lapic_addr_ovr) - return -EINVAL; - - acpi_lapic_addr = lapic_addr_ovr->address; - - return 0; -} - -static int __init -acpi_parse_lapic_nmi ( - acpi_table_entry_header *header) -{ - struct acpi_table_lapic_nmi *lapic_nmi = NULL; - - lapic_nmi = (struct acpi_table_lapic_nmi*) header; - if (!lapic_nmi) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - if (lapic_nmi->lint != 1) - printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); - - return 0; -} - -#endif /*CONFIG_X86_LOCAL_APIC*/ - -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) - -static int __init -acpi_parse_ioapic ( - acpi_table_entry_header *header) -{ - struct acpi_table_ioapic *ioapic = NULL; - - ioapic = (struct acpi_table_ioapic*) header; - if (!ioapic) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - mp_register_ioapic ( - ioapic->id, - ioapic->address, - ioapic->global_irq_base); - - return 0; -} - - -static int __init -acpi_parse_int_src_ovr ( - acpi_table_entry_header *header) -{ - struct acpi_table_int_src_ovr *intsrc = NULL; - - intsrc = (struct acpi_table_int_src_ovr*) header; - if (!intsrc) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - mp_override_legacy_irq ( - intsrc->bus_irq, - intsrc->flags.polarity, - intsrc->flags.trigger, - intsrc->global_irq); - - return 0; -} - - -static int __init -acpi_parse_nmi_src ( - acpi_table_entry_header *header) -{ - struct acpi_table_nmi_src *nmi_src = NULL; - - nmi_src = (struct acpi_table_nmi_src*) header; - if (!nmi_src) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - /* TBD: Support nimsrc entries? */ - - return 0; -} - -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ - - -static unsigned long __init -acpi_scan_rsdp ( - unsigned long start, - unsigned long length) -{ - unsigned long offset = 0; - unsigned long sig_len = sizeof("RSD PTR ") - 1; - - /* - * Scan all 16-byte boundaries of the physical memory region for the - * RSDP signature. - */ - for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) - continue; - return (start + offset); - } - - return 0; -} - - -unsigned long __init -acpi_find_rsdp (void) -{ - unsigned long rsdp_phys = 0; - - /* - * Scan memory looking for the RSDP signature. First search EBDA (low - * memory) paragraphs and then search upper memory (E0000-FFFFF). - */ - rsdp_phys = acpi_scan_rsdp (0, 0x400); - if (!rsdp_phys) - rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF); - - return rsdp_phys; -} - - -/* - * acpi_boot_init() - * called from setup_arch(), always. - * 1. maps ACPI tables for later use - * 2. enumerates lapics - * 3. enumerates io-apics - * - * side effects: - * acpi_lapic = 1 if LAPIC found - * acpi_ioapic = 1 if IOAPIC found - * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; - * if acpi_blacklisted() acpi_disabled = 1; - * acpi_irq_model=... - * ... - * - * return value: (currently ignored) - * 0: success - * !0: failure - */ -int __init -acpi_boot_init (void) -{ - int result = 0; - - if (acpi_disabled && !acpi_ht) - return(1); - - /* - * The default interrupt routing model is PIC (8259). This gets - * overriden if IOAPICs are enumerated (below). - */ - acpi_irq_model = ACPI_IRQ_MODEL_PIC; - - /* - * Initialize the ACPI boot-time table parser. - */ - result = acpi_table_init(); - if (result) { - acpi_disabled = 1; - return result; - } - - result = acpi_blacklisted(); - if (result) { - printk(KERN_NOTICE PREFIX "BIOS listed in blacklist, disabling ACPI support\n"); - acpi_disabled = 1; - return result; - } - -#ifdef CONFIG_X86_LOCAL_APIC - - /* - * MADT - * ---- - * Parse the Multiple APIC Description Table (MADT), if exists. - * Note that this table provides platform SMP configuration - * information -- the successor to MPS tables. - */ - - result = acpi_table_parse(ACPI_APIC, acpi_parse_madt); - if (!result) { - return 0; - } - else if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing MADT\n"); - return result; - } - else if (result > 1) - printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n"); - - /* - * Local APIC - * ---------- - * Note that the LAPIC address is obtained from the MADT (32-bit value) - * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). - */ - - result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); - return result; - } - - mp_register_lapic_address(acpi_lapic_addr); - - result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic); - if (!result) { - printk(KERN_ERR PREFIX "No LAPIC entries present\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return -ENODEV; - } - else if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; - } - - result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; - } - - acpi_lapic = 1; - -#endif /*CONFIG_X86_LOCAL_APIC*/ - -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) - - /* - * I/O APIC - * -------- - */ - - /* - * ACPI interpreter is required to complete interrupt setup, - * so if it is off, don't enumerate the io-apics with ACPI. - * If MPS is present, it will handle them, - * otherwise the system will stay in PIC mode - */ - if (acpi_disabled || acpi_noirq) { - return 1; - } - - /* - * if "noapic" boot option, don't look for IO-APICs - */ - if (ioapic_setup_disabled()) { - printk(KERN_INFO PREFIX "Skipping IOAPIC probe " - "due to 'noapic' option.\n"); - return 1; - } - - - result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic); - if (!result) { - printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); - return -ENODEV; - } - else if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); - return result; - } - - /* Build a default routing table for legacy (ISA) interrupts. */ - mp_config_acpi_legacy_irqs(); - - result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; - } - - result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; - } - - acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - - acpi_irq_balance_set(NULL); - - acpi_ioapic = 1; - - if (acpi_lapic && acpi_ioapic) - smp_found_config = 1; - -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ - - return 0; -} - -#endif /*CONFIG_ACPI_BOOT*/ - -#ifdef CONFIG_ACPI_BUS -/* - * "acpi_pic_sci=level" (current default) - * programs the PIC-mode SCI to Level Trigger. - * (NO-OP if the BIOS set Level Trigger already) - * - * If a PIC-mode SCI is not recogznied or gives spurious IRQ7's - * it may require Edge Trigger -- use "acpi_pic_sci=edge" - * (NO-OP if the BIOS set Edge Trigger already) - * - * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers - * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. - * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) - * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) - */ - -static __initdata int acpi_pic_sci_trigger; /* 0: level, 1: edge */ - -void __init -acpi_pic_sci_set_trigger(unsigned int irq) -{ - unsigned char mask = 1 << (irq & 7); - unsigned int port = 0x4d0 + (irq >> 3); - unsigned char val = inb(port); - - - printk(PREFIX "IRQ%d SCI:", irq); - if (!(val & mask)) { - printk(" Edge"); - - if (!acpi_pic_sci_trigger) { - printk(" set to Level"); - outb(val | mask, port); - } - } else { - printk(" Level"); - - if (acpi_pic_sci_trigger) { - printk(" set to Edge"); - outb(val | mask, port); - } - } - printk(" Trigger.\n"); -} - -int __init -acpi_pic_sci_setup(char *str) -{ - while (str && *str) { - if (strncmp(str, "level", 5) == 0) - acpi_pic_sci_trigger = 0; /* force level trigger */ - if (strncmp(str, "edge", 4) == 0) - acpi_pic_sci_trigger = 1; /* force edge trigger */ - str = strchr(str, ','); - if (str) - str += strspn(str, ", \t"); - } - return 1; -} - -__setup("acpi_pic_sci=", acpi_pic_sci_setup); - -#endif /* CONFIG_ACPI_BUS */ - - - -/* -------------------------------------------------------------------------- - Low-Level Sleep Support - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI_SLEEP - -#define DEBUG - -#ifdef DEBUG -#include -#endif - -/* address in low memory of the wakeup routine. */ -unsigned long acpi_wakeup_address = 0; - -/* new page directory that we will be using */ -static pmd_t *pmd; - -/* saved page directory */ -static pmd_t saved_pmd; - -/* page which we'll use for the new page directory */ -static pte_t *ptep; - -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); - -/* - * acpi_create_identity_pmd - * - * Create a new, identity mapped pmd. - * - * Do this by creating new page directory, and marking all the pages as R/W - * Then set it as the new Page Middle Directory. - * And, of course, flush the TLB so it takes effect. - * - * We save the address of the old one, for later restoration. - */ -static void acpi_create_identity_pmd (void) -{ - pgd_t *pgd; - int i; - - ptep = (pte_t*)__get_free_page(GFP_KERNEL); - - /* fill page with low mapping */ - for (i = 0; i < PTRS_PER_PTE; i++) - set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED)); - - pgd = pgd_offset(current->active_mm, 0); - pmd = pmd_alloc(current->mm,pgd, 0); - - /* save the old pmd */ - saved_pmd = *pmd; - - /* set the new one */ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep))); - - /* flush the TLB */ - local_flush_tlb(); -} - -/* - * acpi_restore_pmd - * - * Restore the old pmd saved by acpi_create_identity_pmd and - * free the page that said function alloc'd - */ -static void acpi_restore_pmd (void) -{ - set_pmd(pmd, saved_pmd); - local_flush_tlb(); - free_page((unsigned long)ptep); -} - -/** - * acpi_save_state_mem - save kernel state - * - * Create an identity mapped page table and copy the wakeup routine to - * low memory. - */ -int acpi_save_state_mem (void) -{ - acpi_create_identity_pmd(); - acpi_copy_wakeup_routine(acpi_wakeup_address); - - return 0; -} - -/** - * acpi_save_state_disk - save kernel state to disk - * - */ -int acpi_save_state_disk (void) -{ - return 1; -} - -/* - * acpi_restore_state - */ -void acpi_restore_state_mem (void) -{ - acpi_restore_pmd(); -} - -/** - * acpi_reserve_bootmem - do _very_ early ACPI initialisation - * - * We allocate a page in low memory for the wakeup - * routine for when we come back from a sleep state. The - * runtime allocator allows specification of <16M pages, but not - * <1M pages. - */ -void __init acpi_reserve_bootmem(void) -{ - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); - printk(KERN_DEBUG "ACPI: have wakeup address 0x%8.8lx\n", acpi_wakeup_address); -} - -void do_suspend_lowlevel_s4bios(int resume) -{ - if (!resume) { - save_processor_context(); - acpi_save_register_state((unsigned long)&&acpi_sleep_done); - acpi_enter_sleep_state_s4bios(); - return; - } -acpi_sleep_done: - restore_processor_context(); -} - - -#endif /*CONFIG_ACPI_SLEEP*/ - diff --git a/xen/arch/i386/apic.c b/xen/arch/i386/apic.c deleted file mode 100644 index c44e430197..0000000000 --- a/xen/arch/i386/apic.c +++ /dev/null @@ -1,830 +0,0 @@ -/* - * Local APIC handling, local APIC timers - * - * (c) 1999, 2000 Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively. - * Maciej W. Rozycki : Various updates and fixes. - * Mikael Pettersson : Power Management for UP-APIC. - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* Using APIC to generate smp_local_timer_interrupt? */ -int using_apic_timer = 0; - -static int enabled_via_apicbase; - -int get_maxlvt(void) -{ - unsigned int v, ver, maxlvt; - - v = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(v); - /* 82489DXs do not report # of LVT entries. */ - maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; - return maxlvt; -} - -void clear_local_APIC(void) -{ - int maxlvt; - unsigned long v; - - maxlvt = get_maxlvt(); - - /* - * Masking an LVT entry on a P6 can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); - } - - /* - * Clean APIC state for other OSs: - */ - apic_write_around(APIC_LVTT, APIC_LVT_MASKED); - apic_write_around(APIC_LVT0, APIC_LVT_MASKED); - apic_write_around(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); - v = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (APIC_INTEGRATED(v)) { /* !82489DX */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } -} - -void __init connect_bsp_APIC(void) -{ - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. - * connect BSP's local APIC to INT and NMI lines. - */ - printk("leaving PIC mode, enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); - } -} - -void disconnect_bsp_APIC(void) -{ - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect - * only on certain older boards). Note that APIC - * interrupts, including IPIs, won't work beyond - * this point! The only exception are INIT IPIs. - */ - printk("disabling APIC mode, entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); - } -} - -void disable_local_APIC(void) -{ - unsigned long value; - - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write_around(APIC_SPIV, value); - - if (enabled_via_apicbase) { - unsigned int l, h; - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_ENABLE; - wrmsr(MSR_IA32_APICBASE, l, h); - } -} - -/* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. - */ -int __init verify_local_APIC(void) -{ - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - Dprintk("Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - Dprintk("Getting VERSION: %x\n", reg1); - - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; - - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; - - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - Dprintk("Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); - Dprintk("Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) - return 0; - - /* - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - reg0 = apic_read(APIC_LVT0); - Dprintk("Getting LVT0: %x\n", reg0); - reg1 = apic_read(APIC_LVT1); - Dprintk("Getting LVT1: %x\n", reg1); - - return 1; -} - -void __init sync_Arb_IDs(void) -{ - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - Dprintk("Synchronizing Arb IDs.\n"); - apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG - | APIC_DM_INIT); -} - -extern void __error_in_apic_c (void); - -/* - * WAS: An initial setup of the virtual wire mode. - * NOW: We don't bother doing anything. All we need at this point - * is to receive timer ticks, so that 'jiffies' is incremented. - * If we're SMP, then we can assume BIOS did setup for us. - * If we're UP, then the APIC should be disabled (it is at reset). - * If we're UP and APIC is enabled, then BIOS is clever and has - * probably done initial interrupt routing for us. - */ -void __init init_bsp_APIC(void) -{ -} - -static unsigned long calculate_ldr(unsigned long old) -{ - unsigned long id = 1UL << smp_processor_id(); - return (old & ~APIC_LDR_MASK)|SET_APIC_LOGICAL_ID(id); -} - -void __init setup_local_APIC (void) -{ - unsigned long value, ver, maxlvt; - - value = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(value); - - if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) - __error_in_apic_c(); - - /* Double-check wether this APIC is really registered. */ - if (!test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map)) - BUG(); - - /* - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ - - /* - * In clustered apic mode, the firmware does this for us - * Put the APIC into flat delivery mode. - * Must be "all ones" explicitly for 82489DX. - */ - apic_write_around(APIC_DFR, APIC_DFR_FLAT); - - /* - * Set up the logical destination ID. - */ - value = apic_read(APIC_LDR); - apic_write_around(APIC_LDR, calculate_ldr(value)); - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write_around(APIC_TASKPRI, value); - - /* - * Now that we are all set up, enable the APIC - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - /* - * Enable APIC - */ - value |= APIC_SPIV_APIC_ENABLED; - - /* Enable focus processor (bit==0) */ - value &= ~APIC_SPIV_FOCUS_DISABLED; - - /* Set spurious IRQ vector */ - value |= SPURIOUS_APIC_VECTOR; - apic_write_around(APIC_SPIV, value); - - /* - * Set up LVT0, LVT1: - * - * set up through-local-APIC on the BP's LINT0. This is not - * strictly necessery in pure symmetric-IO mode, but sometimes - * we delegate interrupts to the 8259A. - */ - /* - * TODO: set up through-local-APIC from through-I/O-APIC? --macro - */ - value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; - if (!smp_processor_id()) { - value = APIC_DM_EXTINT; - printk("enabled ExtINT on CPU#%d\n", smp_processor_id()); - } else { - value = APIC_DM_EXTINT | APIC_LVT_MASKED; - printk("masked ExtINT on CPU#%d\n", smp_processor_id()); - } - apic_write_around(APIC_LVT0, value); - - /* - * only the BP should see the LINT1 NMI signal, obviously. - */ - if (!smp_processor_id()) - value = APIC_DM_NMI; - else - value = APIC_DM_NMI | APIC_LVT_MASKED; - if (!APIC_INTEGRATED(ver)) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write_around(APIC_LVT1, value); - - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - maxlvt = get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - printk("ESR value before enabling vector: %08lx\n", value); - - value = ERROR_APIC_VECTOR; /* enables sending errors */ - apic_write_around(APIC_LVTERR, value); - /* spec says clear errors after enabling vector. */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - printk("ESR value after enabling vector: %08lx\n", value); - } else { - printk("No ESR for 82489DX.\n"); - } - - if ( (smp_processor_id() == 0) && (nmi_watchdog == NMI_LOCAL_APIC) ) - setup_apic_nmi_watchdog(); -} - - -static inline void apic_pm_init1(void) { } -static inline void apic_pm_init2(void) { } - - -/* - * Detect and enable local APICs on non-SMP boards. - * Original code written by Keir Fraser. - */ - -static int __init detect_init_APIC (void) -{ - u32 h, l, features; - extern void get_cpu_vendor(struct cpuinfo_x86*); - - /* Workaround for us being called before identify_cpu(). */ - get_cpu_vendor(&boot_cpu_data); - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) - break; - if (boot_cpu_data.x86 == 15 && cpu_has_apic) - break; - goto no_apic; - case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 == 6 || - (boot_cpu_data.x86 == 15 && cpu_has_apic) || - (boot_cpu_data.x86 == 5 && cpu_has_apic)) - break; - goto no_apic; - default: - goto no_apic; - } - - if (!cpu_has_apic) { - /* - * Some BIOSes disable the local APIC in the - * APIC_BASE MSR. This can only be done in - * software for Intel P6 and AMD K7 (Model > 1). - */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - printk("Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - } - - /* The APIC feature bit should now be enabled in `cpuid' */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - printk("Could not enable APIC!\n"); - return -1; - } - - set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - boot_cpu_physical_apicid = 0; - - /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - - if (nmi_watchdog != NMI_NONE) - nmi_watchdog = NMI_LOCAL_APIC; - - printk("Found and enabled local APIC!\n"); - apic_pm_init1(); - return 0; - - no_apic: - printk("No local APIC present or hardware disabled\n"); - return -1; -} - -void __init init_apic_mappings(void) -{ - unsigned long apic_phys = 0; - - /* - * If no local APIC can be found then set up a fake all zeroes page to - * simulate the local APIC and another one for the IO-APIC. - */ - if (!smp_found_config && detect_init_APIC()) { - apic_phys = get_free_page(GFP_KERNEL); - apic_phys = __pa(apic_phys); - } else - apic_phys = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - -#ifdef CONFIG_X86_IO_APIC - { - unsigned long ioapic_phys = 0, idx = FIX_IO_APIC_BASE_0; - int i; - - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) - ioapic_phys = mp_ioapics[i].mpc_apicaddr; - set_fixmap_nocache(idx, ioapic_phys); - Dprintk("mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - } - } -#endif -} - -/***************************************************************************** - * APIC calibration - * - * The APIC is programmed in bus cycles. - * Timeout values should specified in real time units. - * The "cheapest" time source is the cyclecounter. - * - * Thus, we need a mappings from: bus cycles <- cycle counter <- system time - * - * The calibration is currently a bit shoddy since it requires the external - * timer chip to generate periodic timer interupts. - *****************************************************************************/ - -/* used for system time scaling */ -static unsigned int bus_freq; -static u32 bus_cycle; /* length of one bus cycle in pico-seconds */ -static u32 bus_scale; /* scaling factor convert ns to bus cycles */ - -/* - * The timer chip is already set up at HZ interrupts per second here, - * but we do not accept timer interrupts yet. We only allow the BP - * to calibrate. - */ -static unsigned int __init get_8254_timer_count(void) -{ - /*extern spinlock_t i8253_lock;*/ - /*unsigned long flags;*/ - unsigned int count; - /*spin_lock_irqsave(&i8253_lock, flags);*/ - outb_p(0x00, 0x43); - count = inb_p(0x40); - count |= inb_p(0x40) << 8; - /*spin_unlock_irqrestore(&i8253_lock, flags);*/ - return count; -} - -void __init wait_8254_wraparound(void) -{ - unsigned int curr_count, prev_count=~0; - int delta; - curr_count = get_8254_timer_count(); - do { - prev_count = curr_count; - curr_count = get_8254_timer_count(); - delta = curr_count-prev_count; - /* - * This limit for delta seems arbitrary, but it isn't, it's slightly - * above the level of error a buggy Mercury/Neptune chipset timer can - * cause. - */ - } while (delta < 300); -} - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function with a very large value and read the current time after - * a well defined period of time as expired. - * - * Calibration is only performed once, for CPU0! - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ -#define APIC_DIVISOR 1 -static void __setup_APIC_LVTT(unsigned int clocks) -{ - unsigned int lvtt1_value, tmp_value; - lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV)|LOCAL_TIMER_VECTOR; - apic_write_around(APIC_LVTT, lvtt1_value); - tmp_value = apic_read(APIC_TDCR); - apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1)); - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); -} - -/* - * this is done for every CPU from setup_APIC_clocks() below. - * We setup each local APIC with a zero timeout value for now. - * Unlike Linux, we don't have to wait for slices etc. - */ -void setup_APIC_timer(void * data) -{ - unsigned long flags; - __save_flags(flags); - __sti(); - __setup_APIC_LVTT(0); - __restore_flags(flags); -} - -/* - * In this function we calibrate APIC bus clocks to the external timer. - * - * As a result we have the Bys Speed and CPU speed in Hz. - * - * We want to do the calibration only once (for CPU0). CPUs connected by the - * same APIC bus have the very same bus frequency. - * - * This bit is a bit shoddy since we use the very same periodic timer interrupt - * we try to eliminate to calibrate the APIC. - */ - -int __init calibrate_APIC_clock(void) -{ - unsigned long long t1 = 0, t2 = 0; - long tt1, tt2; - long result; - int i; - const int LOOPS = HZ/10; - - printk("Calibrating APIC timer for CPU%d...\n", smp_processor_id()); - - /* Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. */ - __setup_APIC_LVTT(1000000000); - - /* The timer chip counts down to zero. Let's wait - * for a wraparound to start exact measurement: - * (the current tick might have been already half done) */ - wait_8254_wraparound(); - - /* We wrapped around just now. Let's start: */ - rdtscll(t1); - tt1 = apic_read(APIC_TMCCT); - - /* Let's wait LOOPS wraprounds: */ - for (i = 0; i < LOOPS; i++) - wait_8254_wraparound(); - - tt2 = apic_read(APIC_TMCCT); - rdtscll(t2); - - /* The APIC bus clock counter is 32 bits only, it - * might have overflown, but note that we use signed - * longs, thus no extra care needed. - * underflown to be exact, as the timer counts down ;) */ - result = (tt1-tt2)*APIC_DIVISOR/LOOPS; - - printk("..... CPU speed is %ld.%04ld MHz.\n", - ((long)(t2-t1)/LOOPS) / (1000000/HZ), - ((long)(t2-t1)/LOOPS) % (1000000/HZ)); - - printk("..... Bus speed is %ld.%04ld MHz.\n", - result / (1000000/HZ), - result % (1000000/HZ)); - - /* - * KAF: Moved this to time.c where it's calculated relative to the TSC. - * Therefore works on machines with no local APIC. - */ - /*cpu_freq = (u64)(((t2-t1)/LOOPS)*HZ);*/ - - /* set up multipliers for accurate timer code */ - bus_freq = result*HZ; - bus_cycle = (u32) (1000000000000LL/bus_freq); /* in pico seconds */ - bus_scale = (1000*262144)/bus_cycle; - - printk("..... bus_scale = 0x%08X\n", bus_scale); - /* reset APIC to zero timeout value */ - __setup_APIC_LVTT(0); - return result; -} - -/* - * initialise the APIC timers for all CPUs - * we start with the first and find out processor frequency and bus speed - */ -void __init setup_APIC_clocks (void) -{ - printk("Using local APIC timer interrupts.\n"); - using_apic_timer = 1; - __cli(); - /* calibrate CPU0 for CPU speed and BUS speed */ - bus_freq = calibrate_APIC_clock(); - /* Now set up the timer for real. */ - setup_APIC_timer((void *)bus_freq); - __sti(); - /* and update all other cpus */ - smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1); -} - -#undef APIC_DIVISOR - -/* - * reprogram the APIC timer. Timeoutvalue is in ns from start of boot - * returns 1 on success - * returns 0 if the timeout value is too small or in the past. - */ -int reprogram_ac_timer(s_time_t timeout) -{ - s_time_t now; - s_time_t expire; - u64 apic_tmict; - - /* - * We use this value because we don't trust zero (we think it may just - * cause an immediate interrupt). At least this is guaranteed to hold it - * off for ages (esp. since the clock ticks on bus clock, not cpu clock!). - */ - if ( timeout == 0 ) - { - apic_tmict = 0xffffffff; - goto reprogram; - } - - now = NOW(); - expire = timeout - now; /* value from now */ - - if ( expire <= 0 ) - { - Dprintk("APICT[%02d] Timeout in the past 0x%08X%08X > 0x%08X%08X\n", - smp_processor_id(), (u32)(now>>32), - (u32)now, (u32)(timeout>>32),(u32)timeout); - return 0; - } - - /* - * If we don't have local APIC then we just poll the timer list off the - * PIT interrupt. Cheesy but good enough to work on eg. VMware :-) - */ - if ( !cpu_has_apic ) - return 1; - - /* conversion to bus units */ - apic_tmict = (((u64)bus_scale) * expire)>>18; - - if ( apic_tmict >= 0xffffffff ) - { - Dprintk("APICT[%02d] Timeout value too large\n", smp_processor_id()); - apic_tmict = 0xffffffff; - } - - if ( apic_tmict == 0 ) - { - Dprintk("APICT[%02d] timeout value too small\n", smp_processor_id()); - return 0; - } - - reprogram: - /* Program the timer. */ - apic_write(APIC_TMICT, (unsigned long)apic_tmict); - - return 1; -} - -unsigned int apic_timer_irqs [NR_CPUS]; - -void smp_apic_timer_interrupt(struct pt_regs * regs) -{ - int cpu = smp_processor_id(); - - ack_APIC_irq(); - - apic_timer_irqs[cpu]++; - perfc_incrc(apic_timer); - - __cpu_raise_softirq(cpu, AC_TIMER_SOFTIRQ); -} - -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -asmlinkage void smp_spurious_interrupt(void) -{ - unsigned long v; - - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - printk("spurious APIC interrupt on CPU#%d, should never happen.\n", - smp_processor_id()); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ - -asmlinkage void smp_error_interrupt(void) -{ - unsigned long v, v1; - - /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - /* Here is what the APIC error bits mean: - 0: Send CS error - 1: Receive CS error - 2: Send accept error - 3: Receive accept error - 4: Reserved - 5: Send illegal vector - 6: Received illegal vector - 7: Illegal register address - */ - printk ("APIC error on CPU%d: %02lx(%02lx)\n", - smp_processor_id(), v , v1); -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int __init APIC_init_uniprocessor (void) -{ - if (!smp_found_config && !cpu_has_apic) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!cpu_has_apic&&APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) - { - printk("BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - return -1; - } - - verify_local_APIC(); - - connect_bsp_APIC(); - -#ifdef CONFIG_SMP - cpu_online_map = 1; -#endif - phys_cpu_present_map = 1; - apic_write_around(APIC_ID, boot_cpu_physical_apicid); - - apic_pm_init2(); - - setup_local_APIC(); - -#ifdef CONFIG_X86_IO_APIC - if (smp_found_config && nr_ioapics) - setup_IO_APIC(); -#endif - setup_APIC_clocks(); - - return 0; -} diff --git a/xen/arch/i386/boot/boot.S b/xen/arch/i386/boot/boot.S deleted file mode 100644 index ebb74c6562..0000000000 --- a/xen/arch/i386/boot/boot.S +++ /dev/null @@ -1,249 +0,0 @@ -#include -#include -#include - -#define SECONDARY_CPU_FLAG 0xA5A5A5A5 - - .text - -ENTRY(start) - jmp hal_entry - - .align 4 - -/*** MULTIBOOT HEADER ****/ - /* Magic number indicating a Multiboot header. */ - .long 0x1BADB002 - /* Flags to bootloader (see Multiboot spec). */ - .long 0x00000002 - /* Checksum: must be the negated sum of the first two fields. */ - .long -0x1BADB004 - -hal_entry: - /* Set up a few descriptors: on entry only CS is guaranteed good. */ - lgdt %cs:nopaging_gdt_descr-__PAGE_OFFSET - mov $(__HYPERVISOR_DS),%ecx - mov %ecx,%ds - mov %ecx,%es - mov %ecx,%fs - mov %ecx,%gs - ljmp $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET -1: lss stack_start-__PAGE_OFFSET,%esp - - /* Reset EFLAGS (subsumes CLI and CLD). */ - pushl $0 - popf - - /* CPU type checks. We need P6+. */ - mov $0x200000,%edx - pushfl - pop %ecx - and %edx,%ecx - jne bad_cpu # ID bit should be clear - pushl %edx - popfl - pushfl - pop %ecx - and %edx,%ecx - je bad_cpu # ID bit should be set - - /* Set up CR0. */ - mov %cr0,%ecx - and $0x00000011,%ecx # save ET and PE - or $0x00050022,%ecx # set AM, WP, NE and MP - mov %ecx,%cr0 - - /* Set up FPU. */ - fninit - - /* Set up CR4, except global flag which Intel requires should be */ - /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */ - mov %cr4,%ecx - or mmu_cr4_features-__PAGE_OFFSET,%ecx - mov %ecx,mmu_cr4_features-__PAGE_OFFSET - and $0x7f,%ecx /* disable GLOBAL bit */ - mov %ecx,%cr4 - -#ifdef CONFIG_SMP - /* Is this a non-boot processor? */ - cmp $(SECONDARY_CPU_FLAG),%ebx - jne continue_boot_cpu - - call start_paging - lidt idt_descr - jmp start_secondary -#endif - -continue_boot_cpu: - add $__PAGE_OFFSET,%ebx - push %ebx /* Multiboot info struct */ - push %eax /* Multiboot magic value */ - - /* Initialize BSS (no nasty surprises!) */ - mov $__bss_start-__PAGE_OFFSET,%edi - mov $_end-__PAGE_OFFSET,%ecx - sub %edi,%ecx - xor %eax,%eax - rep stosb - - /* Copy all modules (dom0 + initrd if present) out of the Xen heap */ - mov (%esp),%eax - cmp $0x2BADB002,%eax - jne skip_dom0_copy - sub $__PAGE_OFFSET,%ebx /* turn back into a phys addr */ - mov 0x14(%ebx),%edi /* mbi->mods_count */ - dec %edi /* mbi->mods_count-- */ - jb skip_dom0_copy /* skip if no modules */ - mov 0x18(%ebx),%eax /* mbi->mods_addr */ - mov (%eax),%ebx /* %ebx = mod[0]->mod_start */ - shl $4,%edi - add %edi,%eax - mov 0x4(%eax),%eax /* %eax = mod[mod_count-1]->end */ - mov %eax,%ecx - sub %ebx,%ecx /* %ecx = byte len of all mods */ - mov $(MAX_DIRECTMAP_ADDRESS), %edi - add %ecx, %edi /* %edi = src + length */ - shr $2,%ecx /* %ecx = length/4 */ -1: sub $4,%eax /* %eax = src, %edi = dst */ - sub $4,%edi - mov (%eax),%ebx - mov %ebx,(%edi) - loop 1b -skip_dom0_copy: - - /* Initialize low and high mappings of all memory with 4MB pages */ - mov $idle_pg_table-__PAGE_OFFSET,%edi - mov $0x1e3,%eax /* PRESENT+RW+A+D+4MB+GLOBAL */ -1: mov %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */ - stosl /* low mapping */ - add $(1<physical mapping table. Ring 0 can access all memory. */ -ENTRY(gdt_table) - .fill FIRST_RESERVED_GDT_ENTRY,8,0 - .quad 0x0000000000000000 /* unused */ - .quad 0x00cf9a000000ffff /* 0x0808 ring 0 4.00GB code at 0x0 */ - .quad 0x00cf92000000ffff /* 0x0810 ring 0 4.00GB data at 0x0 */ - .quad 0x00cfba000000c3ff /* 0x0819 ring 1 3.95GB code at 0x0 */ - .quad 0x00cfb2000000c3ff /* 0x0821 ring 1 3.95GB data at 0x0 */ - .quad 0x00cffa000000c3ff /* 0x082b ring 3 3.95GB code at 0x0 */ - .quad 0x00cff2000000c3ff /* 0x0833 ring 3 3.95GB data at 0x0 */ - .quad 0x0000000000000000 /* unused */ - .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ - - .org 0x1000 -ENTRY(idle_pg_table) # Initial page directory is 4kB - .org 0x2000 -ENTRY(cpu0_stack) # Initial stack is 8kB - .org 0x4000 -ENTRY(stext) -ENTRY(_stext) diff --git a/xen/arch/i386/delay.c b/xen/arch/i386/delay.c deleted file mode 100644 index cde5e18b5f..0000000000 --- a/xen/arch/i386/delay.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Precise Delay Loops for i386 - * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares - * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. The additional - * jump magic is needed to get the timing stable on all the CPU's - * we have to worry about. - */ - -#include -#include -#include -#include - -void __udelay(unsigned long usecs) -{ - unsigned long ticks = usecs * ticks_per_usec; - unsigned long s, e; - - rdtscl(s); - do - { - rep_nop(); - rdtscl(e); - } while ((e-s) < ticks); -} diff --git a/xen/arch/i386/domain_page.c b/xen/arch/i386/domain_page.c deleted file mode 100644 index d276979a71..0000000000 --- a/xen/arch/i386/domain_page.c +++ /dev/null @@ -1,81 +0,0 @@ -/****************************************************************************** - * domain_page.h - * - * Allow temporary mapping of domain pages. Based on ideas from the - * Linux PKMAP code -- the copyrights and credits are retained below. - */ - -/* - * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de - * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de * - * Copyright (C) 1999 Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include - -unsigned long *mapcache; -static unsigned int map_idx, shadow_map_idx[NR_CPUS]; -static spinlock_t map_lock = SPIN_LOCK_UNLOCKED; - -/* Use a spare PTE bit to mark entries ready for recycling. */ -#define READY_FOR_TLB_FLUSH (1<<10) - -static void flush_all_ready_maps(void) -{ - unsigned long *cache = mapcache; - - /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */ - do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; } - while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 ); - - perfc_incrc(domain_page_tlb_flush); - local_flush_tlb(); -} - - -void *map_domain_mem(unsigned long pa) -{ - unsigned long va; - unsigned int idx, cpu = smp_processor_id(); - unsigned long *cache = mapcache; - unsigned long flags; - - perfc_incrc(map_domain_mem_count); - - spin_lock_irqsave(&map_lock, flags); - - /* Has some other CPU caused a wrap? We must flush if so. */ - if ( map_idx < shadow_map_idx[cpu] ) - { - perfc_incrc(domain_page_tlb_flush); - local_flush_tlb(); - } - - for ( ; ; ) - { - idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1); - if ( idx == 0 ) flush_all_ready_maps(); - if ( cache[idx] == 0 ) break; - } - - cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR; - - spin_unlock_irqrestore(&map_lock, flags); - - shadow_map_idx[cpu] = idx; - - va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK); - return (void *)va; -} - -void unmap_domain_mem(void *va) -{ - unsigned int idx; - idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT; - mapcache[idx] |= READY_FOR_TLB_FLUSH; -} diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S deleted file mode 100644 index f710ba51ad..0000000000 --- a/xen/arch/i386/entry.S +++ /dev/null @@ -1,736 +0,0 @@ -/* - * linux/arch/i386/entry.S - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -/* - * entry.S contains the system-call and fault low-level handling routines. - * This also contains the timer-interrupt handler, as well as all interrupts - * and faults that can result in a task-switch. - * - * Stack layout in 'ret_from_system_call': - * 0(%esp) - %ebx - * 4(%esp) - %ecx - * 8(%esp) - %edx - * C(%esp) - %esi - * 10(%esp) - %edi - * 14(%esp) - %ebp - * 18(%esp) - %eax - * 1C(%esp) - %ds - * 20(%esp) - %es - * 24(%esp) - %fs - * 28(%esp) - %gs - * 2C(%esp) - orig_eax - * 30(%esp) - %eip - * 34(%esp) - %cs - * 38(%esp) - %eflags - * 3C(%esp) - %oldesp - * 40(%esp) - %oldss - * - * "current" is in register %ebx during any slow entries. - */ -/* The idea for callbacks from monitor -> guest OS. - * - * First, we require that all callbacks (either via a supplied - * interrupt-descriptor-table, or via the special event or failsafe callbacks - * in the shared-info-structure) are to ring 1. This just makes life easier, - * in that it means we don't have to do messy GDT/LDT lookups to find - * out which the privilege-level of the return code-selector. That code - * would just be a hassle to write, and would need to account for running - * off the end of the GDT/LDT, for example. For all callbacks we check - * that the provided - * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as - * don't allow a guest OS to install ring-0 privileges into the GDT/LDT. - * It's up to the guest OS to ensure all returns via the IDT are to ring 1. - * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather - * than the correct ring) and bad things are bound to ensue -- IRET is - * likely to fault, and we may end up killing the domain (no harm can - * come to the hypervisor itself, though). - * - * When doing a callback, we check if the return CS is in ring 0. If so, - * callback is delayed until next return to ring != 0. - * If return CS is in ring 1, then we create a callback frame - * starting at return SS/ESP. The base of the frame does an intra-privilege - * interrupt-return. - * If return CS is in ring > 1, we create a callback frame starting - * at SS/ESP taken from appropriate section of the current TSS. The base - * of the frame does an inter-privilege interrupt-return. - * - * Note that the "failsafe callback" uses a special stackframe: - * { return_DS, return_ES, return_FS, return_GS, return_EIP, - * return_CS, return_EFLAGS[, return_ESP, return_SS] } - * That is, original values for DS/ES/FS/GS are placed on stack rather than - * in DS/ES/FS/GS themselves. Why? It saves us loading them, only to have them - * saved/restored in guest OS. Furthermore, if we load them we may cause - * a fault if they are invalid, which is a hassle to deal with. We avoid - * that problem if we don't load them :-) This property allows us to use - * the failsafe callback as a fallback: if we ever fault on loading DS/ES/FS/GS - * on return to ring != 0, we can simply package it up as a return via - * the failsafe callback, and let the guest OS sort it out (perhaps by - * killing an application process). Note that we also do this for any - * faulting IRET -- just let the guest OS handle it via the event - * callback. - * - * We terminate a domain in the following cases: - * - creating a callback stack frame (due to bad ring-1 stack). - * - faulting IRET on entry to failsafe callback handler. - * So, each domain must keep its ring-1 %ss/%esp and failsafe callback - * handler in good order (absolutely no faults allowed!). - */ - -#include -#include -#include - -EBX = 0x00 -ECX = 0x04 -EDX = 0x08 -ESI = 0x0C -EDI = 0x10 -EBP = 0x14 -EAX = 0x18 -DS = 0x1C -ES = 0x20 -FS = 0x24 -GS = 0x28 -ORIG_EAX = 0x2C -EIP = 0x30 -CS = 0x34 -EFLAGS = 0x38 -OLDESP = 0x3C -OLDSS = 0x40 - -/* Offsets in task_struct */ -PROCESSOR = 0 -HYP_EVENTS = 2 -SHARED_INFO = 4 -EVENT_SEL = 8 -EVENT_ADDR = 12 -FAILSAFE_BUFFER = 16 -FAILSAFE_SEL = 32 -FAILSAFE_ADDR = 36 - -/* Offsets in shared_info_t */ -#define UPCALL_PENDING /* 0 */ -#define UPCALL_MASK 1 - -/* Offsets in guest_trap_bounce */ -GTB_ERROR_CODE = 0 -GTB_CR2 = 4 -GTB_FLAGS = 8 -GTB_CS = 10 -GTB_EIP = 12 -GTBF_TRAP = 1 -GTBF_TRAP_NOCODE = 2 -GTBF_TRAP_CR2 = 4 - -CF_MASK = 0x00000001 -IF_MASK = 0x00000200 -NT_MASK = 0x00004000 - - - -#define SAVE_ALL_NOSEGREGS \ - cld; \ - pushl %gs; \ - pushl %fs; \ - pushl %es; \ - pushl %ds; \ - pushl %eax; \ - pushl %ebp; \ - pushl %edi; \ - pushl %esi; \ - pushl %edx; \ - pushl %ecx; \ - pushl %ebx; \ - -#define SAVE_ALL \ - SAVE_ALL_NOSEGREGS \ - movl $(__HYPERVISOR_DS),%edx; \ - movl %edx,%ds; \ - movl %edx,%es; \ - movl %edx,%fs; \ - movl %edx,%gs; \ - sti; - -#define GET_CURRENT(reg) \ - movl $4096-4, reg; \ - orl %esp, reg; \ - andl $~3,reg; \ - movl (reg),reg; - -ENTRY(continue_nonidle_task) - GET_CURRENT(%ebx) - jmp test_all_events - - ALIGN -/* - * HYPERVISOR_multicall(call_list, nr_calls) - * Execute a list of 'nr_calls' system calls, pointed at by 'call_list'. - * This is fairly easy except that: - * 1. We may fault reading the call list, and must patch that up; and - * 2. We cannot recursively call HYPERVISOR_multicall, or a malicious - * caller could cause our stack to blow up. - */ -do_multicall: - popl %eax - cmpl $SYMBOL_NAME(multicall_return_from_call),%eax - je multicall_return_from_call - pushl %ebx - movl 4(%esp),%ebx /* EBX == call_list */ - movl 8(%esp),%ecx /* ECX == nr_calls */ -multicall_loop: - pushl %ecx -multicall_fault1: - pushl 20(%ebx) # args[4] -multicall_fault2: - pushl 16(%ebx) # args[3] -multicall_fault3: - pushl 12(%ebx) # args[2] -multicall_fault4: - pushl 8(%ebx) # args[1] -multicall_fault5: - pushl 4(%ebx) # args[0] -multicall_fault6: - movl (%ebx),%eax # op - andl $255,%eax - call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4) -multicall_return_from_call: -multicall_fault7: - movl %eax,24(%ebx) # args[5] == result - addl $20,%esp - popl %ecx - addl $(ARGS_PER_MULTICALL_ENTRY*4),%ebx - loop multicall_loop - popl %ebx - xorl %eax,%eax - jmp ret_from_hypervisor_call - -.section __ex_table,"a" - .align 4 - .long multicall_fault1, multicall_fixup1 - .long multicall_fault2, multicall_fixup2 - .long multicall_fault3, multicall_fixup3 - .long multicall_fault4, multicall_fixup4 - .long multicall_fault5, multicall_fixup5 - .long multicall_fault6, multicall_fixup6 -.previous - -.section .fixup,"ax" -multicall_fixup6: - addl $4,%esp -multicall_fixup5: - addl $4,%esp -multicall_fixup4: - addl $4,%esp -multicall_fixup3: - addl $4,%esp -multicall_fixup2: - addl $4,%esp -multicall_fixup1: - addl $4,%esp - popl %ebx - movl $-EFAULT,%eax - jmp ret_from_hypervisor_call -.previous - - ALIGN -restore_all_guest: - # First, may need to restore %ds if clobbered by create_bounce_frame - pushl %ss - popl %ds - # Second, create a failsafe copy of DS,ES,FS,GS in case any are bad - leal DS(%esp),%esi - leal FAILSAFE_BUFFER(%ebx),%edi - movsl - movsl - movsl - movsl - # Finally, restore guest registers -- faults will cause failsafe - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %eax -1: popl %ds -2: popl %es -3: popl %fs -4: popl %gs - addl $4,%esp -5: iret -.section .fixup,"ax" -10: subl $4,%esp - pushl %gs -9: pushl %fs -8: pushl %es -7: pushl %ds -6: pushl %eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx - pushl %ss - popl %ds - pushl %ss - popl %es - jmp failsafe_callback -.previous -.section __ex_table,"a" - .align 4 - .long 1b,6b - .long 2b,7b - .long 3b,8b - .long 4b,9b - .long 5b,10b -.previous - -/* No special register assumptions */ -failsafe_callback: - GET_CURRENT(%ebx) - movzwl PROCESSOR(%ebx),%eax - shl $4,%eax - lea guest_trap_bounce(%eax),%edx - movl FAILSAFE_ADDR(%ebx),%eax - movl %eax,GTB_EIP(%edx) - movl FAILSAFE_SEL(%ebx),%eax - movw %ax,GTB_CS(%edx) - call create_bounce_frame - subl $16,%esi # add DS/ES/FS/GS to failsafe stack frame - leal FAILSAFE_BUFFER(%ebx),%ebp - movl 0(%ebp),%eax # DS -FAULT1: movl %eax,(%esi) - movl 4(%ebp),%eax # ES -FAULT2: movl %eax,4(%esi) - movl 8(%ebp),%eax # FS -FAULT3: movl %eax,8(%esi) - movl 12(%ebp),%eax # GS -FAULT4: movl %eax,12(%esi) - movl %esi,OLDESP(%esp) - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %eax - addl $20,%esp # skip DS/ES/FS/GS/ORIG_EAX -FAULT5: iret - - - ALIGN -# Simple restore -- we should never fault as we we will only interrupt ring 0 -# when sane values have been placed in all registers. The only exception is -# NMI, which may interrupt before good values have been placed in DS-GS. -# The NMI return code deals with this problem itself. -restore_all_xen: - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %eax - popl %ds - popl %es - popl %fs - popl %gs - addl $4,%esp - iret - - ALIGN -ENTRY(hypervisor_call) - pushl %eax # save orig_eax - SAVE_ALL - GET_CURRENT(%ebx) - andl $255,%eax - call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4) - -ret_from_hypervisor_call: - movl %eax,EAX(%esp) # save the return value - -test_all_events: - xorl %ecx,%ecx - notl %ecx - cli # tests must not race interrupts -/*test_softirqs:*/ - movzwl PROCESSOR(%ebx),%eax - shl $6,%eax # sizeof(irq_cpustat) == 64 - test %ecx,SYMBOL_NAME(irq_stat)(%eax,1) - jnz process_softirqs -/*test_hyp_events:*/ - testw %cx, HYP_EVENTS(%ebx) - jnz process_hyp_events -/*test_guest_events:*/ - movl SHARED_INFO(%ebx),%eax - testb $0xFF,UPCALL_MASK(%eax) - jnz restore_all_guest - testb $0xFF,UPCALL_PENDING(%eax) - jz restore_all_guest - movb $1,UPCALL_MASK(%eax) # Upcalls are masked during delivery -/*process_guest_events:*/ - movzwl PROCESSOR(%ebx),%edx - shl $4,%edx # sizeof(guest_trap_bounce) == 16 - lea guest_trap_bounce(%edx),%edx - movl EVENT_ADDR(%ebx),%eax - movl %eax,GTB_EIP(%edx) - movl EVENT_SEL(%ebx),%eax - movw %ax,GTB_CS(%edx) - call create_bounce_frame - jmp restore_all_guest - - ALIGN -process_softirqs: - sti - call SYMBOL_NAME(do_softirq) - jmp test_all_events - - ALIGN -process_hyp_events: - sti - call SYMBOL_NAME(do_hyp_events) - jmp test_all_events - -/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */ -/* {EIP, CS, EFLAGS, [ESP, SS]} */ -/* %edx == guest_trap_bounce, %ebx == task_struct */ -/* %eax,%ecx are clobbered. %ds:%esi contain new OLDSS/OLDESP. */ -create_bounce_frame: - mov CS+4(%esp),%cl - test $2,%cl - jz 1f /* jump if returning to an existing ring-1 activation */ - /* obtain ss/esp from TSS -- no current ring-1 activations */ - movzwl PROCESSOR(%ebx),%eax - /* next 4 lines multiply %eax by 8320, which is sizeof(tss_struct) */ - movl %eax, %ecx - shll $7, %ecx - shll $13, %eax - addl %ecx,%eax - addl $init_tss + 12,%eax - movl (%eax),%esi /* tss->esp1 */ -FAULT6: movl 4(%eax),%ds /* tss->ss1 */ - /* base of stack frame must contain ss/esp (inter-priv iret) */ - subl $8,%esi - movl OLDESP+4(%esp),%eax -FAULT7: movl %eax,(%esi) - movl OLDSS+4(%esp),%eax -FAULT8: movl %eax,4(%esi) - jmp 2f -1: /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */ - movl OLDESP+4(%esp),%esi -FAULT9: movl OLDSS+4(%esp),%ds -2: /* Construct a stack frame: EFLAGS, CS/EIP */ - subl $12,%esi - movl EIP+4(%esp),%eax -FAULT10:movl %eax,(%esi) - movl CS+4(%esp),%eax -FAULT11:movl %eax,4(%esi) - movl EFLAGS+4(%esp),%eax -FAULT12:movl %eax,8(%esi) - /* Rewrite our stack frame and return to ring 1. */ - /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ - andl $0xfffcbeff,%eax - movl %eax,EFLAGS+4(%esp) - movl %ds,OLDSS+4(%esp) - movl %esi,OLDESP+4(%esp) - movzwl %es:GTB_CS(%edx),%eax - movl %eax,CS+4(%esp) - movl %es:GTB_EIP(%edx),%eax - movl %eax,EIP+4(%esp) - ret - - -.section __ex_table,"a" - .align 4 - .long FAULT1, crash_domain_fixup3 # Fault writing to ring-1 stack - .long FAULT2, crash_domain_fixup3 # Fault writing to ring-1 stack - .long FAULT3, crash_domain_fixup3 # Fault writing to ring-1 stack - .long FAULT4, crash_domain_fixup3 # Fault writing to ring-1 stack - .long FAULT5, crash_domain_fixup1 # Fault executing failsafe iret - .long FAULT6, crash_domain_fixup2 # Fault loading ring-1 stack selector - .long FAULT7, crash_domain_fixup2 # Fault writing to ring-1 stack - .long FAULT8, crash_domain_fixup2 # Fault writing to ring-1 stack - .long FAULT9, crash_domain_fixup2 # Fault loading ring-1 stack selector - .long FAULT10,crash_domain_fixup2 # Fault writing to ring-1 stack - .long FAULT11,crash_domain_fixup2 # Fault writing to ring-1 stack - .long FAULT12,crash_domain_fixup2 # Fault writing to ring-1 stack - .long FAULT13,crash_domain_fixup3 # Fault writing to ring-1 stack - .long FAULT14,crash_domain_fixup3 # Fault writing to ring-1 stack -.previous - -# This handler kills domains which experience unrecoverable faults. -.section .fixup,"ax" -crash_domain_fixup1: - subl $4,%esp - SAVE_ALL - jmp crash_domain -crash_domain_fixup2: - addl $4,%esp -crash_domain_fixup3: - pushl %ss - popl %ds - jmp crash_domain -.previous - - ALIGN -process_guest_exception_and_events: - movzwl PROCESSOR(%ebx),%eax - shl $4,%eax - lea guest_trap_bounce(%eax),%edx - testb $~0,GTB_FLAGS(%edx) - jz test_all_events - call create_bounce_frame # just the basic frame - mov %es:GTB_FLAGS(%edx),%cl - test $GTBF_TRAP_NOCODE,%cl - jnz 2f - subl $4,%esi # push error_code onto guest frame - movl %es:GTB_ERROR_CODE(%edx),%eax -FAULT13:movl %eax,(%esi) - test $GTBF_TRAP_CR2,%cl - jz 1f - subl $4,%esi # push %cr2 onto guest frame - movl %es:GTB_CR2(%edx),%eax -FAULT14:movl %eax,(%esi) -1: movl %esi,OLDESP(%esp) -2: push %es # unclobber %ds - pop %ds - movb $0,GTB_FLAGS(%edx) - jmp test_all_events - - ALIGN -ENTRY(ret_from_intr) - GET_CURRENT(%ebx) - movb CS(%esp),%al - testb $3,%al # return to non-supervisor? - jne test_all_events - jmp restore_all_xen - -ENTRY(divide_error) - pushl $0 # no error code - pushl $ SYMBOL_NAME(do_divide_error) - ALIGN -error_code: - pushl %fs - pushl %es - pushl %ds - pushl %eax - xorl %eax,%eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - decl %eax # eax = -1 - pushl %ecx - pushl %ebx - cld - movl %gs,%ecx - movl ORIG_EAX(%esp), %esi # get the error code - movl GS(%esp), %edi # get the function address - movl %eax, ORIG_EAX(%esp) - movl %ecx, GS(%esp) - movl $(__HYPERVISOR_DS),%edx - movl %edx,%ds - movl %edx,%es - movl %edx,%fs - movl %edx,%gs - movl %esp,%edx - pushl %esi # push the error code - pushl %edx # push the pt_regs pointer - GET_CURRENT(%ebx) - call *%edi - addl $8,%esp - movb CS(%esp),%al - testb $3,%al - je restore_all_xen - jmp process_guest_exception_and_events - -ENTRY(coprocessor_error) - pushl $0 - pushl $ SYMBOL_NAME(do_coprocessor_error) - jmp error_code - -ENTRY(simd_coprocessor_error) - pushl $0 - pushl $ SYMBOL_NAME(do_simd_coprocessor_error) - jmp error_code - -ENTRY(device_not_available) - pushl $0 - pushl $SYMBOL_NAME(math_state_restore) - jmp error_code - -ENTRY(debug) - pushl $0 - pushl $ SYMBOL_NAME(do_debug) - jmp error_code - -ENTRY(int3) - pushl $0 - pushl $ SYMBOL_NAME(do_int3) - jmp error_code - -ENTRY(overflow) - pushl $0 - pushl $ SYMBOL_NAME(do_overflow) - jmp error_code - -ENTRY(bounds) - pushl $0 - pushl $ SYMBOL_NAME(do_bounds) - jmp error_code - -ENTRY(invalid_op) - pushl $0 - pushl $ SYMBOL_NAME(do_invalid_op) - jmp error_code - -ENTRY(coprocessor_segment_overrun) - pushl $0 - pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) - jmp error_code - -ENTRY(invalid_TSS) - pushl $ SYMBOL_NAME(do_invalid_TSS) - jmp error_code - -ENTRY(segment_not_present) - pushl $ SYMBOL_NAME(do_segment_not_present) - jmp error_code - -ENTRY(stack_segment) - pushl $ SYMBOL_NAME(do_stack_segment) - jmp error_code - -ENTRY(general_protection) - pushl $ SYMBOL_NAME(do_general_protection) - jmp error_code - -ENTRY(alignment_check) - pushl $ SYMBOL_NAME(do_alignment_check) - jmp error_code - -ENTRY(page_fault) - pushl $ SYMBOL_NAME(do_page_fault) - jmp error_code - -ENTRY(machine_check) - pushl $0 - pushl $ SYMBOL_NAME(do_machine_check) - jmp error_code - -ENTRY(spurious_interrupt_bug) - pushl $0 - pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) - jmp error_code - -ENTRY(nmi) - # Save state but do not trash the segment registers! - # We may otherwise be unable to reload them or copy them to ring 1. - pushl %eax - SAVE_ALL_NOSEGREGS - - # Check for hardware problems. These are always fatal so we can - # reload DS and ES when handling them. - inb $0x61,%al - testb $0x80,%al - jne nmi_parity_err - testb $0x40,%al - jne nmi_io_err - movl %eax,%ebx - - # Okay, its almost a normal NMI tick. We can only process it if: - # A. We are the outermost Xen activation (in which case we have - # the selectors safely saved on our stack) - # B. DS-GS all contain sane Xen values. - # In all other cases we bail without touching DS-GS, as we have - # interrupted an enclosing Xen activation in tricky prologue or - # epilogue code. - movb CS(%esp),%al - testb $3,%al - jne do_watchdog_tick - movl DS(%esp),%eax - cmpw $(__HYPERVISOR_DS),%ax - jne nmi_badseg - movl ES(%esp),%eax - cmpw $(__HYPERVISOR_DS),%ax - jne nmi_badseg - movl FS(%esp),%eax - cmpw $(__HYPERVISOR_DS),%ax - jne nmi_badseg - movl GS(%esp),%eax - cmpw $(__HYPERVISOR_DS),%ax - jne nmi_badseg - -do_watchdog_tick: - movl $(__HYPERVISOR_DS),%edx - movl %edx,%ds - movl %edx,%es - movl %esp,%edx - pushl %ebx # reason - pushl %edx # regs - call SYMBOL_NAME(do_nmi) - addl $8,%esp - movb CS(%esp),%al - testb $3,%al - je restore_all_xen - GET_CURRENT(%ebx) - jmp restore_all_guest - -nmi_badseg: - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %eax - addl $20,%esp - iret - -nmi_parity_err: - movl $(__HYPERVISOR_DS),%edx - movl %edx,%ds - movl %edx,%es - jmp SYMBOL_NAME(mem_parity_error) - -nmi_io_err: - movl $(__HYPERVISOR_DS),%edx - movl %edx,%ds - movl %edx,%es - jmp SYMBOL_NAME(io_check_error) - -.data -ENTRY(hypervisor_call_table) - .long SYMBOL_NAME(do_set_trap_table) /* 0 */ - .long SYMBOL_NAME(do_mmu_update) - .long SYMBOL_NAME(do_console_write) - .long SYMBOL_NAME(do_set_gdt) - .long SYMBOL_NAME(do_stack_switch) - .long SYMBOL_NAME(do_set_callbacks) /* 5 */ - .long SYMBOL_NAME(do_ni_syscall) # do_net_io_op - .long SYMBOL_NAME(do_fpu_taskswitch) - .long SYMBOL_NAME(do_sched_op) - .long SYMBOL_NAME(do_dom0_op) - .long SYMBOL_NAME(do_ni_syscall) /* 10 */ # do_network_op - .long SYMBOL_NAME(do_ni_syscall) # do_block_io_op - .long SYMBOL_NAME(do_set_debugreg) - .long SYMBOL_NAME(do_get_debugreg) - .long SYMBOL_NAME(do_update_descriptor) - .long SYMBOL_NAME(do_set_fast_trap) /* 15 */ - .long SYMBOL_NAME(do_dom_mem_op) - .long SYMBOL_NAME(do_multicall) - .long SYMBOL_NAME(do_kbd_op) - .long SYMBOL_NAME(do_update_va_mapping) - .long SYMBOL_NAME(do_set_timer_op) /* 20 */ - .long SYMBOL_NAME(do_event_channel_op) - .long SYMBOL_NAME(do_xen_version) - .long SYMBOL_NAME(do_console_io) - .long SYMBOL_NAME(do_physdev_op) - .long SYMBOL_NAME(do_update_va_mapping_otherdomain) /* 25 */ - .rept NR_syscalls-((.-hypervisor_call_table)/4) - .long SYMBOL_NAME(do_ni_syscall) - .endr diff --git a/xen/arch/i386/extable.c b/xen/arch/i386/extable.c deleted file mode 100644 index af37b86013..0000000000 --- a/xen/arch/i386/extable.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * linux/arch/i386/mm/extable.c - */ - -#include -#include -#include -#include - -extern const struct exception_table_entry __start___ex_table[]; -extern const struct exception_table_entry __stop___ex_table[]; - -static inline unsigned long -search_one_table(const struct exception_table_entry *first, - const struct exception_table_entry *last, - unsigned long value) -{ - while (first <= last) { - const struct exception_table_entry *mid; - long diff; - - mid = (last - first) / 2 + first; - diff = mid->insn - value; - if (diff == 0) - return mid->fixup; - else if (diff < 0) - first = mid+1; - else - last = mid-1; - } - return 0; -} - -extern spinlock_t modlist_lock; - -unsigned long -search_exception_table(unsigned long addr) -{ - unsigned long ret = 0; - -#ifndef CONFIG_MODULES - /* There is only the kernel to search. */ - ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr); - return ret; -#else - unsigned long flags; - /* The kernel is the last "module" -- no need to treat it special. */ - struct module *mp; - - spin_lock_irqsave(&modlist_lock, flags); - for (mp = module_list; mp != NULL; mp = mp->next) { - if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING))) - continue; - ret = search_one_table(mp->ex_table_start, - mp->ex_table_end - 1, addr); - if (ret) - break; - } - spin_unlock_irqrestore(&modlist_lock, flags); - return ret; -#endif -} diff --git a/xen/arch/i386/flushtlb.c b/xen/arch/i386/flushtlb.c deleted file mode 100644 index c53f52161c..0000000000 --- a/xen/arch/i386/flushtlb.c +++ /dev/null @@ -1,40 +0,0 @@ -/****************************************************************************** - * flushtlb.c - * - * TLB flushes are timestamped using a global virtual 'clock' which ticks - * on any TLB flush on any processor. - * - * Copyright (c) 2003, K A Fraser - */ - -#include -#include -#include -#include - -u32 tlbflush_clock; -u32 tlbflush_time[NR_CPUS]; - -void tlb_clocktick(void) -{ - u32 y, ny; - - /* Tick the clock. 'y' contains the current time after the tick. */ - ny = tlbflush_clock; - do { -#ifdef CONFIG_SMP - if ( unlikely(((y = ny+1) & TLBCLOCK_EPOCH_MASK) == 0) ) - { - raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ); - y = tlbflush_clock; - break; - } -#else - y = ny+1; -#endif - } - while ( unlikely((ny = cmpxchg(&tlbflush_clock, y-1, y)) != y-1) ); - - /* Update this CPU's timestamp to new time. */ - tlbflush_time[smp_processor_id()] = y; -} diff --git a/xen/arch/i386/i387.c b/xen/arch/i386/i387.c deleted file mode 100644 index 34cd1c9b3b..0000000000 --- a/xen/arch/i386/i387.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * linux/arch/i386/kernel/i387.c - * - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes , May 2000 - */ - -#include -#include -#include -#include - -void init_fpu(void) -{ - __asm__("fninit"); - if ( cpu_has_xmm ) load_mxcsr(0x1f80); - set_bit(PF_DONEFPUINIT, ¤t->flags); -} - -static inline void __save_init_fpu( struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - asm volatile( "fxsave %0 ; fnclex" - : "=m" (tsk->thread.i387.fxsave) ); - } else { - asm volatile( "fnsave %0 ; fwait" - : "=m" (tsk->thread.i387.fsave) ); - } - clear_bit(PF_USEDFPU, &tsk->flags); -} - -void save_init_fpu( struct task_struct *tsk ) -{ - /* - * The guest OS may have set the 'virtual STTS' flag. - * This causes us to set the real flag, so we'll need - * to temporarily clear it while saving f-p state. - */ - if ( test_bit(PF_GUEST_STTS, &tsk->flags) ) clts(); - __save_init_fpu(tsk); - stts(); -} - -void restore_fpu( struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - asm volatile( "fxrstor %0" - : : "m" (tsk->thread.i387.fxsave) ); - } else { - asm volatile( "frstor %0" - : : "m" (tsk->thread.i387.fsave) ); - } -} diff --git a/xen/arch/i386/i8259.c b/xen/arch/i386/i8259.c deleted file mode 100644 index 58ecb12553..0000000000 --- a/xen/arch/i386/i8259.c +++ /dev/null @@ -1,470 +0,0 @@ -/****************************************************************************** - * i8259.c - * - * Well, this is required for SMP systems as well, as it build interrupt - * tables for IO APICS as well as uniprocessor 8259-alikes. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - -/* - * Common place to define all x86 IRQ vectors - * - * This builds up the IRQ handler stubs using some ugly macros in irq.h - * - * These macros create the low-level assembly IRQ routines that save - * register context and call do_IRQ(). do_IRQ() then does all the - * operations that are needed to keep the AT (or SMP IOAPIC) - * interrupt-controller happy. - */ - -BUILD_COMMON_IRQ() - -#define BI(x,y) \ - BUILD_IRQ(x##y) - -#define BUILD_16_IRQS(x) \ - BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ - BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ - BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ - BI(x,c) BI(x,d) BI(x,e) BI(x,f) - -/* - * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: - * (these are usually mapped to vectors 0x30-0x3f) - */ - BUILD_16_IRQS(0x0) - -#ifdef CONFIG_X86_IO_APIC -/* - * The IO-APIC gives us many more interrupt sources. Most of these - * are unused but an SMP system is supposed to have enough memory ... - * sometimes (mostly wrt. hw bugs) we get corrupted vectors all - * across the spectrum, so we really want to be prepared to get all - * of these. Plus, more powerful systems might have more than 64 - * IO-APIC registers. - * - * (these are usually mapped into the 0x30-0xff vector range) - */ - BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) - BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) - BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) - BUILD_16_IRQS(0xc) -#endif - -#undef BUILD_16_IRQS -#undef BI - - -/* - * The following vectors are part of the Linux architecture, there - * is no hardware IRQ pin equivalent for them, they are triggered - * through the ICC by us (IPIs) - */ -#ifdef CONFIG_SMP - BUILD_SMP_INTERRUPT(event_check_interrupt,EVENT_CHECK_VECTOR) - BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) - BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) -#endif - -/* - * every pentium local APIC has two 'local interrupts', with a - * soft-definable vector attached to both interrupts, one of - * which is a timer interrupt, the other one is error counter - * overflow. Linux uses the local APIC timer interrupt to get - * a much simpler SMP time architecture: - */ -#ifdef CONFIG_X86_LOCAL_APIC - BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) - BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) - BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) -#endif - -#define IRQ(x,y) \ - IRQ##x##y##_interrupt - -#define IRQLIST_16(x) \ - IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ - IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ - IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ - IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) - - void (*interrupt[NR_IRQS])(void) = { - IRQLIST_16(0x0), - -#ifdef CONFIG_X86_IO_APIC - IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3), - IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), - IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), - IRQLIST_16(0xc) -#endif - }; - -#undef IRQ -#undef IRQLIST_16 - -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - * this file should become arch/i386/kernel/irq.c when the old irq.c - * moves to arch independent land - */ - -spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED; - -static void end_8259A_irq (unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - enable_8259A_irq(irq); -} - -#define shutdown_8259A_irq disable_8259A_irq - -void mask_and_ack_8259A(unsigned int); - -static unsigned int startup_8259A_irq(unsigned int irq) -{ - enable_8259A_irq(irq); - return 0; /* never anything pending */ -} - -static struct hw_interrupt_type i8259A_irq_type = { - "XT-PIC", - startup_8259A_irq, - shutdown_8259A_irq, - enable_8259A_irq, - disable_8259A_irq, - mask_and_ack_8259A, - end_8259A_irq, - NULL -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -static unsigned int cached_irq_mask = 0xffff; - -#define __byte(x,y) (((unsigned char *)&(y))[x]) -#define cached_21 (__byte(0,cached_irq_mask)) -#define cached_A1 (__byte(1,cached_irq_mask)) - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_A1,0xA1); - else - outb(cached_21,0x21); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_A1,0xA1); - else - outb(cached_21,0x21); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,0xA0); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecesserily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; - - handle_real_irq: - if (irq & 8) { - inb(0xA1); /* DUMMY - (do we need this?) */ - outb(cached_A1,0xA1); - outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */ - outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */ - } else { - inb(0x21); /* DUMMY - (do we need this?) */ - outb(cached_21,0x21); - outb(0x60+irq,0x20); /* 'Specific EOI' to master */ - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - - spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk("spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -void __init init_8259A(int auto_eoi) -{ - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, 0x21); /* mask all of 8259A-1 */ - outb(0xff, 0xA1); /* mask all of 8259A-2 */ - - /* - * outb_p - this has to work on a wide range of PC hardware. - */ - outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ - outb_p(0x30 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */ - if (auto_eoi) - outb_p(0x03, 0x21); /* master does Auto EOI */ - else - outb_p(0x01, 0x21); /* master expects normal EOI */ - - outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ - outb_p(0x30 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ - outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */ - outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode - is to be investigated) */ - - if (auto_eoi) - /* - * in AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_irq_type.ack = disable_8259A_irq; - else - i8259A_irq_type.ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_21, 0x21); /* restore master IRQ mask */ - outb(cached_A1, 0xA1); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ - -static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; - -void __init init_ISA_irqs (void) -{ - int i; - -#ifdef CONFIG_X86_LOCAL_APIC - init_bsp_APIC(); -#endif - init_8259A(0); - - for (i = 0; i < NR_IRQS; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = 0; - irq_desc[i].depth = 1; - - if (i < 16) { - /* - * 16 old-style INTA-cycle interrupts: - */ - irq_desc[i].handler = &i8259A_irq_type; - } else { - /* - * 'high' PCI IRQs filled in on demand - */ - irq_desc[i].handler = &no_irq_type; - } - } -} - -void __init init_IRQ(void) -{ - int i; - - init_ISA_irqs(); - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < NR_IRQS; i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (vector != HYPERVISOR_CALL_VECTOR) - set_intr_gate(vector, interrupt[i]); - } - -#ifdef CONFIG_SMP - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); - - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt); - - /* IPI for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - - /* IPI for generic function call */ - set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); -#endif - -#ifdef CONFIG_X86_LOCAL_APIC - /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); -#endif - - /* - * Set the clock to HZ Hz, we already have a valid - * vector now: - */ -#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */ -#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ) - outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ - outb_p(LATCH & 0xff , 0x40); /* LSB */ - outb(LATCH >> 8 , 0x40); /* MSB */ - - setup_irq(2, &irq2); -} - diff --git a/xen/arch/i386/idle0_task.c b/xen/arch/i386/idle0_task.c deleted file mode 100644 index b956fdff40..0000000000 --- a/xen/arch/i386/idle0_task.c +++ /dev/null @@ -1,15 +0,0 @@ -#include -#include -#include - -struct task_struct idle0_task = IDLE0_TASK(idle0_task); - -/* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. The TSS size is kept cacheline-aligned - * so they are allowed to end up in the .data.cacheline_aligned - * section. Since TSS's are completely CPU-local, we want them - * on exact cacheline boundaries, to eliminate cacheline ping-pong. - */ -struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; - diff --git a/xen/arch/i386/io_apic.c b/xen/arch/i386/io_apic.c deleted file mode 100644 index 9c94e787e0..0000000000 --- a/xen/arch/i386/io_apic.c +++ /dev/null @@ -1,1944 +0,0 @@ -/* - * Intel IO-APIC support for multi-Pentium hosts. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo - * - * Many thanks to Stig Venaas for trying out countless experimental - * patches and reporting/debugging problems patiently! - * - * (c) 1999, Multiple IO-APIC support, developed by - * Ken-ichi Yaku and - * Hidemi Kishimoto , - * further tested and cleaned up by Zach Brown - * and Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively - * Paul Diefenbaugh : Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_X86_IO_APIC - -#undef APIC_LOCKUP_DEBUG - -#define APIC_LOCKUP_DEBUG - -static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED; - -unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL; -unsigned char int_delivery_mode = dest_LowestPrio; - - -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - -static struct irq_pin_list { - int apic, pin, next; -} irq_2_pin[PIN_MAP_SIZE]; - -/* - * The common case is 1:1 IRQ<->pin mappings. Sometimes there are - * shared ISA-space IRQs, so we have to support them. We are super - * fast in the common case, and fast for shared ISA-space IRQs. - */ -static void __init add_pin_to_irq(unsigned int irq, int apic, int pin) -{ - static int first_free_entry = NR_IRQS; - struct irq_pin_list *entry = irq_2_pin + irq; - - while (entry->next) - entry = irq_2_pin + entry->next; - - if (entry->pin != -1) { - entry->next = first_free_entry; - entry = irq_2_pin + entry->next; - if (++first_free_entry >= PIN_MAP_SIZE) - panic("io_apic.c: whoops"); - } - entry->apic = apic; - entry->pin = pin; -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq(unsigned int irq, - int oldapic, int oldpin, - int newapic, int newpin) -{ - struct irq_pin_list *entry = irq_2_pin + irq; - - while (1) { - if (entry->apic == oldapic && entry->pin == oldpin) { - entry->apic = newapic; - entry->pin = newpin; - } - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } -} - -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_pin_list *entry = irq_2_pin + irq; \ - \ - for (;;) { \ - unsigned int reg; \ - pin = entry->pin; \ - if (pin == -1) \ - break; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_write(entry->apic, 0x10 + R + pin*2, reg); \ - if (!entry->next) \ - break; \ - entry = irq_2_pin + entry->next; \ - } \ - FINAL; \ -} - -#define DO_ACTION(name,R,ACTION, FINAL) \ - \ - static void name##_IO_APIC_irq (unsigned int irq) \ - __DO_ACTION(R, ACTION, FINAL) - -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) -DO_ACTION( __edge, 0, &= 0xffff7fff, ) -DO_ACTION( __level, 0, |= 0x00008000, ) - -static void mask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void unmask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) -{ - struct IO_APIC_route_entry entry; - unsigned long flags; - - /* Check delivery_mode to be sure we're not clearing an SMI pin */ - spin_lock_irqsave(&ioapic_lock, flags); - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); - if (entry.delivery_mode == dest_SMI) - return; - - /* - * Disable it in the IO-APIC irq-routing table: - */ - memset(&entry, 0, sizeof(entry)); - entry.mask = 1; - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void clear_IO_APIC (void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); -} - -static void set_ioapic_affinity (unsigned int irq, unsigned long mask) -{ - unsigned long flags; - - /* - * Only the first 8 bits are valid. - */ - mask = mask << 24; - spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = mask, ) - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#define balance_irq(_irq) ((void)0) - -/* - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to - * specific CPU-side IRQs. - */ - -#define MAX_PIRQS 8 -int pirq_entries [MAX_PIRQS]; -int pirqs_enabled; - -int skip_ioapic_setup; -#if 0 - -static int __init noioapic_setup(char *str) -{ - skip_ioapic_setup = 1; - return 1; -} - -__setup("noapic", noioapic_setup); - -static int __init ioapic_setup(char *str) -{ - skip_ioapic_setup = 0; - return 1; -} - -__setup("apic", ioapic_setup); - - - -static int __init ioapic_pirq_setup(char *str) -{ - int i, max; - int ints[MAX_PIRQS+1]; - - get_options(str, ARRAY_SIZE(ints), ints); - - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - pirqs_enabled = 1; - printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n"); - max = MAX_PIRQS; - if (ints[0] < MAX_PIRQS) - max = ints[0]; - - for (i = 0; i < max; i++) { - printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); - /* - * PIRQs are mapped upside down, usually. - */ - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; - } - return 1; -} - -__setup("pirq=", ioapic_pirq_setup); - -#endif - -/* - * Find the IRQ entry number of a certain pin. - */ -static int __init find_irq_entry(int apic, int pin, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) - return i; - - return -1; -} - -/* - * Find the pin to which IRQ[irq] (ISA) is connected - */ -static int __init find_isa_irq_pin(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; - - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || - mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) - - return mp_irqs[i].mpc_dstirq; - } - return -1; -} - -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", - bus, slot, pin); - if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) { - printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) - break; - - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && - !mp_irqs[i].mpc_irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} - -/* - * EISA Edge/Level control register, ELCR - */ -static int __init EISA_ELCR(unsigned int irq) -{ - if (irq < 16) { - unsigned int port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; - } - printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq); - return 0; -} - -/* EISA interrupts are always polarity zero and can be edge or level - * trigger depending on the ELCR value. If an interrupt is listed as - * EISA conforming in the MP table, that means its trigger type must - * be read in from the ELCR */ - -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) -#define default_EISA_polarity(idx) (0) - -/* ISA interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) - -/* PCI interrupts are always polarity one level triggered, - * when listed as conforming in the MP table. */ - -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) - -/* MCA interrupts are always polarity zero level triggered, - * when listed as conforming in the MP table. */ - -#define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) (0) - -static int __init MPBIOS_polarity(int idx) -{ - int bus = mp_irqs[idx].mpc_srcbus; - int polarity; - - /* - * Determine IRQ line polarity (high active or low active): - */ - switch (mp_irqs[idx].mpc_irqflag & 3) - { - case 0: /* conforms, ie. bus-type dependent polarity */ - { - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - { - polarity = default_ISA_polarity(idx); - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - polarity = default_EISA_polarity(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - polarity = default_PCI_polarity(idx); - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - polarity = default_MCA_polarity(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - break; - } - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - return polarity; -} - -static int __init MPBIOS_trigger(int idx) -{ - int bus = mp_irqs[idx].mpc_srcbus; - int trigger; - - /* - * Determine IRQ trigger mode (edge or level sensitive): - */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) - { - case 0: /* conforms, ie. bus-type dependent */ - { - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - { - trigger = default_ISA_trigger(idx); - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - trigger = default_PCI_trigger(idx); - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } - break; - } - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } - return trigger; -} - -static inline int irq_polarity(int idx) -{ - return MPBIOS_polarity(idx); -} - -static inline int irq_trigger(int idx) -{ - return MPBIOS_trigger(idx); -} - -static int pin_2_irq(int idx, int apic, int pin) -{ - int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; - - /* - * Debugging check, we are in big trouble if this message pops up! - */ - if (mp_irqs[idx].mpc_dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - case MP_BUS_EISA: - case MP_BUS_MCA: - { - irq = mp_irqs[idx].mpc_srcbusirq; - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* - * PCI IRQs are mapped in order - */ - i = irq = 0; - while (i < apic) - irq += nr_ioapic_registers[i++]; - irq += pin; - break; - } - default: - { - printk(KERN_ERR "unknown bus type %d.\n",bus); - irq = 0; - break; - } - } - - /* - * PCI IRQ command line redirection. Yes, limits are hardcoded. - */ - if ((pin >= 16) && (pin <= 23)) { - if (pirq_entries[pin-16] != -1) { - if (!pirq_entries[pin-16]) { - printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16); - } else { - irq = pirq_entries[pin-16]; - printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n", - pin-16, irq); - } - } - } - return irq; -} - -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic,pin,mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} - -int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 }; - -static int __init assign_irq_vector(int irq) -{ - static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; - if (IO_APIC_VECTOR(irq) > 0) - return IO_APIC_VECTOR(irq); -next: - current_vector += 8; - - /* XXX Skip the guestOS -> Xen syscall vector! XXX */ - if (current_vector == HYPERVISOR_CALL_VECTOR) goto next; - /* XXX Skip the Linux/BSD fast-trap vector! XXX */ - if (current_vector == 0x80) goto next; - - if (current_vector > FIRST_SYSTEM_VECTOR) { - offset++; - current_vector = FIRST_DEVICE_VECTOR + offset; - } - - if (current_vector == FIRST_SYSTEM_VECTOR) - panic("ran out of interrupt sources!"); - - IO_APIC_VECTOR(irq) = current_vector; - return current_vector; -} - -extern void (*interrupt[NR_IRQS])(void); - -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ - -static unsigned int startup_edge_ioapic_irq(unsigned int irq); -#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq -#define enable_edge_ioapic_irq unmask_IO_APIC_irq -static void disable_edge_ioapic_irq (unsigned int irq); -static void ack_edge_ioapic_irq(unsigned int irq); -static void end_edge_ioapic_irq (unsigned int i); -static struct hw_interrupt_type ioapic_edge_irq_type = { - "IO-APIC-edge", - startup_edge_ioapic_irq, - shutdown_edge_ioapic_irq, - enable_edge_ioapic_irq, - disable_edge_ioapic_irq, - ack_edge_ioapic_irq, - end_edge_ioapic_irq, - set_ioapic_affinity, -}; - -static unsigned int startup_level_ioapic_irq (unsigned int irq); -#define shutdown_level_ioapic_irq mask_IO_APIC_irq -#define enable_level_ioapic_irq unmask_IO_APIC_irq -#define disable_level_ioapic_irq mask_IO_APIC_irq -static void mask_and_ack_level_ioapic_irq (unsigned int irq); -static void end_level_ioapic_irq (unsigned int irq); -static struct hw_interrupt_type ioapic_level_irq_type = { - "IO-APIC-level", - startup_level_ioapic_irq, - shutdown_level_ioapic_irq, - enable_level_ioapic_irq, - disable_level_ioapic_irq, - mask_and_ack_level_ioapic_irq, - end_level_ioapic_irq, - set_ioapic_affinity, -}; - -void __init setup_IO_APIC_irqs(void) -{ - struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1, vector; - unsigned long flags; - - printk(KERN_DEBUG "init IO_APIC IRQs\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = (INT_DEST_ADDR_MODE != 0); - entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = target_cpus(); - - idx = find_irq_entry(apic,pin,mp_INT); - if (idx == -1) { - if (first_notcon) { - printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); - first_notcon = 0; - } else - printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin); - continue; - } - - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; - entry.mask = 1; - } - - irq = pin_2_irq(idx, apic, pin); - /* - * skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum - */ - if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) - && (apic != 0) && (irq == 0)) - continue; - else - add_pin_to_irq(irq, apic, pin); - - if (!apic && !IO_APIC_IRQ(irq)) - continue; - - if (IO_APIC_IRQ(irq)) { - vector = assign_irq_vector(irq); - entry.vector = vector; - - if (IO_APIC_irq_trigger(irq)) - irq_desc[irq].handler = &ioapic_level_irq_type; - else - irq_desc[irq].handler = &ioapic_edge_irq_type; - - set_intr_gate(vector, interrupt[irq]); - - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); - spin_unlock_irqrestore(&ioapic_lock, flags); - } - } - - if (!first_notcon) - printk(" not connected.\n"); -} - -/* - * Set up the 8259A-master output pin as broadcast to all - * CPUs. - */ -void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) -{ - struct IO_APIC_route_entry entry; - unsigned long flags; - - memset(&entry,0,sizeof(entry)); - - disable_8259A_irq(0); - - /* mask LVT0 */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - entry.dest_mode = (INT_DEST_ADDR_MODE != 0); - entry.mask = 0; /* unmask IRQ now */ - entry.dest.logical.logical_dest = target_cpus(); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we have a 8259A-master in AEOI mode ... - */ - irq_desc[0].handler = &ioapic_edge_irq_type; - - /* - * Add it to the IO-APIC irq-routing table: - */ - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); - spin_unlock_irqrestore(&ioapic_lock, flags); - - enable_8259A_irq(0); -} - -void __init UNEXPECTED_IO_APIC(void) -{ - printk(KERN_WARNING - "An unexpected IO-APIC was found. If this kernel release is less than\n" - "three months old please report this to linux-smp@vger.kernel.org\n"); -} - -void __init print_IO_APIC(void) -{ -#ifndef NDEBUG - int apic, i; - struct IO_APIC_reg_00 reg_00; - struct IO_APIC_reg_01 reg_01; - struct IO_APIC_reg_02 reg_02; - struct IO_APIC_reg_03 reg_03; - unsigned long flags; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (i = 0; i < nr_ioapics; i++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - - spin_lock_irqsave(&ioapic_lock, flags); - *(int *)®_00 = io_apic_read(apic, 0); - *(int *)®_01 = io_apic_read(apic, 1); - if (reg_01.version >= 0x10) - *(int *)®_02 = io_apic_read(apic, 2); - if (reg_01.version >= 0x20) - *(int *)®_03 = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); - - printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); - printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)®_00); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.LTS); - if (reg_00.__reserved_0 || reg_00.__reserved_1 || reg_00.__reserved_2) - UNEXPECTED_IO_APIC(); - - printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.entries); - if ( (reg_01.entries != 0x0f) && /* older (Neptune) boards */ - (reg_01.entries != 0x17) && /* typical ISA+PCI boards */ - (reg_01.entries != 0x1b) && /* Compaq Proliant boards */ - (reg_01.entries != 0x1f) && /* dual Xeon boards */ - (reg_01.entries != 0x22) && /* bigger Xeon boards */ - (reg_01.entries != 0x2E) && - (reg_01.entries != 0x3F) - ) - UNEXPECTED_IO_APIC(); - - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.version); - if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */ - (reg_01.version != 0x02) && /* VIA */ - (reg_01.version != 0x03) && /* later VIA */ - (reg_01.version != 0x10) && /* oldest IO-APICs */ - (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */ - (reg_01.version != 0x13) && /* Xeon IO-APICs */ - (reg_01.version != 0x20) /* Intel P64H (82806 AA) */ - ) - UNEXPECTED_IO_APIC(); - if (reg_01.__reserved_1 || reg_01.__reserved_2) - UNEXPECTED_IO_APIC(); - - /* - * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, - * but the value of reg_02 is read as the previous read register - * value, so ignore it if reg_02 == reg_01. - */ - if (reg_01.version >= 0x10 && *(int *)®_02 != *(int *)®_01) { - printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)®_02); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.arbitration); - if (reg_02.__reserved_1 || reg_02.__reserved_2) - UNEXPECTED_IO_APIC(); - } - - /* - * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 - * or reg_03, but the value of reg_0[23] is read as the previous read - * register value, so ignore it if reg_03 == reg_0[12]. - */ - if (reg_01.version >= 0x20 && *(int *)®_03 != *(int *)®_02 && - *(int *)®_03 != *(int *)®_01) { - printk(KERN_DEBUG ".... register #03: %08X\n", *(int *)®_03); - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.boot_DT); - if (reg_03.__reserved_1) - UNEXPECTED_IO_APIC(); - } - - printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" - " Stat Dest Deli Vect: \n"); - - for (i = 0; i <= reg_01.entries; i++) { - struct IO_APIC_route_entry entry; - - spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); - spin_unlock_irqrestore(&ioapic_lock, flags); - - printk(KERN_DEBUG " %02x %03X %02X ", - i, - entry.dest.logical.logical_dest, - entry.dest.physical.physical_dest - ); - - printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } - printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < NR_IRQS; i++) { - struct irq_pin_list *entry = irq_2_pin + i; - if (entry->pin < 0) - continue; - printk(KERN_DEBUG "IRQ%d ", i); - for (;;) { - printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } - printk("\n"); - } - - printk(KERN_INFO ".................................... done.\n"); -#endif -} - - -#if 0 /* Maybe useful for debugging, but not currently used anywhere. */ - -static void print_APIC_bitfield (int base) -{ - unsigned int v; - int i, j; - - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - printk("\n"); -} - -void print_all_local_APICs (void) -{ - smp_call_function(print_local_APIC, NULL, 1, 1); - print_local_APIC(NULL); -} - -void /*__init*/ print_PIC(void) -{ - extern spinlock_t i8259A_lock; - unsigned int v, flags; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b,0xa0); - outb(0x0b,0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); - - spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -#endif /* 0 */ - - -static void __init enable_IO_APIC(void) -{ - struct IO_APIC_reg_01 reg_01; - int i; - unsigned long flags; - - for (i = 0; i < PIN_MAP_SIZE; i++) { - irq_2_pin[i].pin = -1; - irq_2_pin[i].next = 0; - } - if (!pirqs_enabled) - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (i = 0; i < nr_ioapics; i++) { - spin_lock_irqsave(&ioapic_lock, flags); - *(int *)®_01 = io_apic_read(i, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[i] = reg_01.entries+1; - } - - /* - * Do not trust the IO-APIC being empty at bootup - */ - clear_IO_APIC(); -} - -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) -{ - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - disconnect_bsp_APIC(); -} - -/* - * function to set the IO-APIC physical IDs based on the - * values stored in the MPC table. - * - * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 - */ - -static void __init setup_ioapic_ids_from_mpc (void) -{ - struct IO_APIC_reg_00 reg_00; - unsigned long phys_id_present_map = phys_cpu_present_map; - int apic; - int i; - unsigned char old_id; - unsigned long flags; - - if (clustered_apic_mode) - /* We don't have a good way to do this yet - hack */ - phys_id_present_map = (u_long) 0xf; - /* - * Set the IOAPIC ID to the value stored in the MPC table. - */ - for (apic = 0; apic < nr_ioapics; apic++) { - - /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); - *(int *)®_00 = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mp_ioapics[apic].mpc_apicid; - - if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mpc_apicid); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.ID); - mp_ioapics[apic].mpc_apicid = reg_00.ID; - } - - /* - * Sanity check, is the ID really free? Every APIC in a - * system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs. - */ - if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) && - (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mpc_apicid); - for (i = 0; i < 0xf; i++) - if (!(phys_id_present_map & (1 << i))) - break; - if (i >= apic_broadcast_id) - panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - phys_id_present_map |= 1 << i; - mp_ioapics[apic].mpc_apicid = i; - } else { - printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid); - phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid; - } - - - /* - * We need to adjust the IRQ routing table - * if the ID changed. - */ - if (old_id != mp_ioapics[apic].mpc_apicid) - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_dstapic == old_id) - mp_irqs[i].mpc_dstapic - = mp_ioapics[apic].mpc_apicid; - - /* - * Read the right value from the MPC table and - * write it into the ID register. - */ - printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mpc_apicid); - - reg_00.ID = mp_ioapics[apic].mpc_apicid; - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, *(int *)®_00); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* - * Sanity check - */ - spin_lock_irqsave(&ioapic_lock, flags); - *(int *)®_00 = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.ID != mp_ioapics[apic].mpc_apicid) - panic("could not set ID!\n"); - else - printk(" ok.\n"); - } -} - -/* - * There is a nasty bug in some older SMP boards, their mptable lies - * about the timer IRQ. We do the following to work around the situation: - * - * - timer IRQ defaults to IO-APIC IRQ - * - if this function detects that timer IRQs are defunct, then we fall - * back to ISA timer IRQs - */ -static int __init timer_irq_works(void) -{ - unsigned int t1 = jiffies; - - sti(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); - - /* - * Expect a few ticks at least, to be sure some possible - * glue logic does not lock up after one or two first - * ticks in a non-ExtINT mode. Also the local APIC - * might have cached one ExtINT interrupt. Finally, at - * least one tick may be lost due to delays. - */ - if (jiffies - t1 > 4) - return 1; - - return 0; -} - -static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ } - -/* - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending. - * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... - */ - -static unsigned int startup_edge_ioapic_irq(unsigned int irq) -{ - int was_pending = 0; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { - disable_8259A_irq(irq); - if (i8259A_irq_pending(irq)) - was_pending = 1; - } - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return was_pending; -} - -/* - * Once we have recorded IRQ_PENDING already, we can mask the - * interrupt for real. This prevents IRQ storms from unhandled - * devices. - */ -static void ack_edge_ioapic_irq(unsigned int irq) -{ - balance_irq(irq); - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) - == (IRQ_PENDING | IRQ_DISABLED)) - mask_IO_APIC_irq(irq); - ack_APIC_irq(); -} - -static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ } - - -/* - * Level triggered interrupts can just be masked, - * and shutting down and starting up the interrupt - * is the same as enabling and disabling them -- except - * with a startup need to return a "was pending" value. - * - * Level triggered interrupts are special because we - * do not touch any IO-APIC register while handling - * them. We ack the APIC in the end-IRQ handler, not - * in the start-IRQ-handler. Protection against reentrance - * from the same interrupt is still provided, both by the - * generic IRQ layer and by the fact that an unacked local - * APIC does not accept IRQs. - */ -static unsigned int startup_level_ioapic_irq (unsigned int irq) -{ - unmask_IO_APIC_irq(irq); - - return 0; /* don't check for pending */ -} - -static void mask_and_ack_level_ioapic_irq(unsigned int irq) -{ - unsigned long v; - int i; - - balance_irq(irq); - - mask_IO_APIC_irq(irq); - -/* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = IO_APIC_VECTOR(irq); - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); - - if (!(v & (1 << (i & 0x1f)))) { -#ifdef APIC_LOCKUP_DEBUG - struct irq_pin_list *entry; -#endif - -#ifdef APIC_MISMATCH_DEBUG - atomic_inc(&irq_mis_count); -#endif - spin_lock(&ioapic_lock); - __edge_IO_APIC_irq(irq); -#ifdef APIC_LOCKUP_DEBUG - for (entry = irq_2_pin + irq;;) { - unsigned int reg; - - if (entry->pin == -1) - break; - reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2); - if (reg & 0x00004000) - printk(KERN_CRIT "Aieee!!! Remote IRR" - " still set after unlock!\n"); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } -#endif - __level_IO_APIC_irq(irq); - spin_unlock(&ioapic_lock); - } -} - -static void end_level_ioapic_irq(unsigned int irq) -{ - unmask_IO_APIC_irq(irq); -} - -static inline void init_IO_APIC_traps(void) -{ - int irq; - - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - for (irq = 0; irq < NR_IRQS ; irq++) { - if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) { - /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 - * interrupt if we can.. - */ - if (irq < 16) - make_8259A_irq(irq); - else - /* Strange. Oh, well.. */ - irq_desc[irq].handler = &no_irq_type; - } - } -} - -static void enable_lapic_irq (unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); -} - -static void disable_lapic_irq (unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); -} - -static void ack_lapic_irq (unsigned int irq) -{ - ack_APIC_irq(); -} - -static void end_lapic_irq (unsigned int i) { /* nothing */ } - -static struct hw_interrupt_type lapic_irq_type = { - "local-APIC-edge", - NULL, /* startup_irq() not used for IRQ0 */ - NULL, /* shutdown_irq() not used for IRQ0 */ - enable_lapic_irq, - disable_lapic_irq, - ack_lapic_irq, - end_lapic_irq -}; - -/* - * This looks a bit hackish but it's about the only one way of sending - * a few INTA cycles to 8259As and any associated glue logic. ICR does - * not support the ExtINT mode, unfortunately. We need to send these - * cycles as some i82489DX-based boards have glue logic that keeps the - * 8259A interrupt line asserted until INTA. --macro - */ -static inline void unlock_ExtINT_logic(void) -{ - int pin, i; - struct IO_APIC_route_entry entry0, entry1; - unsigned char save_control, save_freq_select; - unsigned long flags; - - pin = find_isa_irq_pin(8, mp_INT); - if (pin == -1) - return; - - spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); - *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); - clear_IO_APIC_pin(0, pin); - - memset(&entry1, 0, sizeof(entry1)); - - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ - entry1.dest.physical.physical_dest = hard_smp_processor_id(); - entry1.delivery_mode = dest_ExtINT; - entry1.polarity = entry0.polarity; - entry1.trigger = 0; - entry1.vector = 0; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); - spin_unlock_irqrestore(&ioapic_lock, flags); - - save_control = CMOS_READ(RTC_CONTROL); - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, - RTC_FREQ_SELECT); - CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); - - i = 100; - while (i-- > 0) { - mdelay(10); - if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) - i -= 10; - } - - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(0, pin); - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * This code may look a bit paranoid, but it's supposed to cooperate with - * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ - * is so screwy. Thanks to Brian Perkins for testing/hacking this beast - * fanatically on his truly buggy board. - */ -static inline void check_timer(void) -{ - extern int timer_ack; - int pin1, pin2; - int vector; - - /* - * get/set the timer IRQ vector: - */ - disable_8259A_irq(0); - vector = assign_irq_vector(0); - set_intr_gate(vector, interrupt[0]); - - /* - * Subtle, code in do_timer_interrupt() expects an AEOI - * mode for the 8259A whenever interrupts are routed - * through I/O APICs. Also IRQ0 has to be enabled in - * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. - */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - init_8259A(1); - timer_ack = 1; - enable_8259A_irq(0); - - pin1 = find_isa_irq_pin(0, mp_INT); - pin2 = find_isa_irq_pin(0, mp_ExtINT); - - printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2); - - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - unmask_IO_APIC_irq(0); - if (timer_irq_works()) - return; - clear_IO_APIC_pin(0, pin1); - printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); - } - - printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); - if (pin2 != -1) { - printk("\n..... (found pin %d) ...", pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - setup_ExtINT_IRQ0_pin(pin2, vector); - if (timer_irq_works()) { - printk("works.\n"); - if (pin1 != -1) - replace_pin_at_irq(0, 0, pin1, 0, pin2); - else - add_pin_to_irq(0, 0, pin2); - return; - } - /* - * Cleanup, just in case ... - */ - clear_IO_APIC_pin(0, pin2); - } - printk(" failed.\n"); - - printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); - - disable_8259A_irq(0); - irq_desc[0].handler = &lapic_irq_type; - apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ - enable_8259A_irq(0); - - if (timer_irq_works()) { - printk(" works.\n"); - return; - } - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); - printk(" failed.\n"); - - printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); - - init_8259A(0); - make_8259A_irq(0); - apic_write_around(APIC_LVT0, APIC_DM_EXTINT); - - unlock_ExtINT_logic(); - - if (timer_irq_works()) { - printk(" works.\n"); - return; - } - printk(" failed :(.\n"); - panic("IO-APIC + timer doesn't work! pester mingo@redhat.com"); -} - -/* - * - * IRQ's that are handled by the old PIC in all cases: - * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. - * Linux doesn't really care, as it's not actually used - * for any interrupt handling anyway. - * - There used to be IRQ13 here as well, but all - * MPS-compliant must not use it for FPU coupling and we - * want to use exception 16 anyway. And there are - * systems who connect it to an I/O APIC for other uses. - * Thus we don't mark it special any longer. - * - * Additionally, something is definitely wrong with irq9 - * on PIIX4 boards. - */ -#define PIC_IRQS (1<<2) - -void __init setup_IO_APIC(void) -{ - enable_IO_APIC(); - - io_apic_irqs = ~PIC_IRQS; - printk("ENABLING IO-APIC IRQs\n"); - - /* - * Set up IO-APIC IRQ routing. - */ - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); - sync_Arb_IDs(); - setup_IO_APIC_irqs(); - init_IO_APIC_traps(); - check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); -} - -#endif /* CONFIG_X86_IO_APIC */ - - - -/* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI_BOOT - -#define IO_APIC_MAX_ID 15 - -int __init io_apic_get_unique_id (int ioapic, int apic_id) -{ - struct IO_APIC_reg_00 reg_00; - static unsigned long apic_id_map = 0; - unsigned long flags; - int i = 0; - - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - - if (!apic_id_map) - apic_id_map = phys_cpu_present_map; - - spin_lock_irqsave(&ioapic_lock, flags); - *(int *)®_00 = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - if (apic_id >= IO_APIC_MAX_ID) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.ID); - apic_id = reg_00.ID; - } - - /* XAPICs do not need unique IDs */ - if (clustered_apic_mode == CLUSTERED_APIC_XAPIC){ - printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", - ioapic, apic_id); - return apic_id; - } - - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (apic_id_map & (1 << apic_id)) { - - for (i = 0; i < IO_APIC_MAX_ID; i++) { - if (!(apic_id_map & (1 << i))) - break; - } - - if (i == IO_APIC_MAX_ID) - panic("Max apic_id exceeded!\n"); - - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - - apic_id = i; - } - - apic_id_map |= (1 << apic_id); - - if (reg_00.ID != apic_id) { - reg_00.ID = apic_id; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, *(int *)®_00); - *(int *)®_00 = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* Sanity check */ - if (reg_00.ID != apic_id) - panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); - } - - printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); - - return apic_id; -} - - -int __init io_apic_get_version (int ioapic) -{ - struct IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - *(int *)®_01 = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.version; -} - - -int __init io_apic_get_redir_entries (int ioapic) -{ - struct IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - *(int *)®_01 = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.entries; -} - - -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) -{ - struct IO_APIC_route_entry entry; - unsigned long flags; - - if (!IO_APIC_IRQ(irq)) { - printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0/n", - ioapic); - return -EINVAL; - } - - /* - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. - * Note that we mask (disable) IRQs now -- these get enabled when the - * corresponding device driver registers for this IRQ. - */ - - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = dest_LowestPrio; - entry.dest_mode = INT_DELIVERY_MODE; - entry.dest.logical.logical_dest = target_cpus(); - entry.mask = 1; /* Disabled (masked) */ - entry.trigger = edge_level; - entry.polarity = active_high_low; - - add_pin_to_irq(irq, ioapic, pin); - - entry.vector = assign_irq_vector(irq); - - printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); - - if (edge_level) { - irq_desc[irq].handler = &ioapic_level_irq_type; - } else { - irq_desc[irq].handler = &ioapic_edge_irq_type; - } - - set_intr_gate(entry.vector, interrupt[irq]); - - if (!ioapic && (irq < 16)) - disable_8259A_irq(irq); - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return 0; -} - -#endif /*CONFIG_ACPI_BOOT*/ - -extern char opt_leveltrigger[], opt_edgetrigger[]; - -static int __init ioapic_trigger_setup(void) -{ - char *p; - irq_desc_t *desc; - long irq; - - p = opt_leveltrigger; - while ( *p != '\0' ) - { - irq = simple_strtol(p, &p, 10); - if ( (irq <= 0) || (irq >= NR_IRQS) ) - { - printk("IRQ '%ld' out of range in level-trigger list '%s'\n", - irq, opt_leveltrigger); - break; - } - - printk("Forcing IRQ %ld to level-trigger: ", irq); - - desc = &irq_desc[irq]; - spin_lock_irq(&desc->lock); - - if ( desc->handler == &ioapic_level_irq_type ) - { - printk("already level-triggered (no force applied).\n"); - } - else if ( desc->handler != &ioapic_edge_irq_type ) - { - printk("cannot force (can only force IO-APIC-edge IRQs).\n"); - } - else - { - desc->handler = &ioapic_level_irq_type; - __mask_IO_APIC_irq(irq); - __level_IO_APIC_irq(irq); - printk("done.\n"); - } - - spin_unlock_irq(&desc->lock); - - if ( *p == '\0' ) - break; - - if ( *p != ',' ) - { - printk("Unexpected character '%c' in level-trigger list '%s'\n", - *p, opt_leveltrigger); - break; - } - - p++; - } - - p = opt_edgetrigger; - while ( *p != '\0' ) - { - irq = simple_strtol(p, &p, 10); - if ( (irq <= 0) || (irq >= NR_IRQS) ) - { - printk("IRQ '%ld' out of range in edge-trigger list '%s'\n", - irq, opt_edgetrigger); - break; - } - - printk("Forcing IRQ %ld to edge-trigger: ", irq); - - desc = &irq_desc[irq]; - spin_lock_irq(&desc->lock); - - if ( desc->handler == &ioapic_edge_irq_type ) - { - printk("already edge-triggered (no force applied).\n"); - } - else if ( desc->handler != &ioapic_level_irq_type ) - { - printk("cannot force (can only force IO-APIC-level IRQs).\n"); - } - else - { - desc->handler = &ioapic_edge_irq_type; - __edge_IO_APIC_irq(irq); - desc->status |= IRQ_PENDING; /* may have lost a masked edge */ - printk("done.\n"); - } - - spin_unlock_irq(&desc->lock); - - if ( *p == '\0' ) - break; - - if ( *p != ',' ) - { - printk("Unexpected character '%c' in edge-trigger list '%s'\n", - *p, opt_edgetrigger); - break; - } - - p++; - } - - return 0; -} - -__initcall(ioapic_trigger_setup); diff --git a/xen/arch/i386/ioremap.c b/xen/arch/i386/ioremap.c deleted file mode 100644 index a0a0f69988..0000000000 --- a/xen/arch/i386/ioremap.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * arch/i386/mm/ioremap.c - * - * Re-map IO memory to kernel address space so that we can access it. - * This is needed for high PCI addresses that aren't mapped in the - * 640k-1MB IO memory area on PC's - * - * (C) Copyright 1995 1996 Linus Torvalds - */ - -#include -#include -#include -#include -#include -#include - -static unsigned long remap_base = IOREMAP_VIRT_START; - -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) - -void * __ioremap(unsigned long phys_addr, - unsigned long size, - unsigned long flags) -{ - unsigned long vaddr; - unsigned long offset, cur=0, last_addr; - l2_pgentry_t *pl2e; - l1_pgentry_t *pl1e; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if ( (size == 0) || (last_addr < phys_addr) ) - return NULL; - - /* Don't remap the low PCI/ISA area: it's always mapped. */ - if ( (phys_addr >= 0xA0000) && (last_addr < 0x100000) ) - return phys_to_virt(phys_addr); - - if ( (remap_base + size) > (IOREMAP_VIRT_END - 1) ) - { - printk("ioremap: going past end of reserved space!\n"); - return NULL; - } - - /* Mappings have to be page-aligned. */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; - - /* Ok, go for it. */ - vaddr = remap_base; - remap_base += size; - pl2e = &idle_pg_table[l2_table_offset(vaddr)]; - pl1e = l2_pgentry_to_l1(*pl2e++) + l1_table_offset(vaddr); - do { - *pl1e++ = mk_l1_pgentry((phys_addr+cur)|PAGE_HYPERVISOR|flags); - } - while ( (cur += PAGE_SIZE) != size ); - - return (void *)(offset + (char *)vaddr); -} - -void iounmap(void *addr) -{ - /* NOP for now. */ -} diff --git a/xen/arch/i386/irq.c b/xen/arch/i386/irq.c deleted file mode 100644 index 8224082fc8..0000000000 --- a/xen/arch/i386/irq.c +++ /dev/null @@ -1,1100 +0,0 @@ -/* - * linux/arch/i386/kernel/irq.c - * - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the code used by various IRQ handling routines: - * asking for different IRQ's should be done through these routines - * instead of just grabbing them. Thus setup_irqs with different IRQ numbers - * shouldn't result in any weird surprises, and installing new handlers - * should be easier. - */ - -/* - * (mostly architecture independent, will move to kernel/irq.c in 2.5.) - * - * IRQs are in fact implemented a bit like signal handlers for the kernel. - * Naturally it's not a 1:1 relation, but there are similarities. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Linux has a controller-independent x86 interrupt architecture. - * every controller has a 'controller-template', that is used - * by the main code to do the right thing. Each driver-visible - * interrupt source is transparently wired to the apropriate - * controller. Thus drivers need not be aware of the - * interrupt-controller. - * - * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, - * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. - * (IO-APICs assumed to be messaging to Pentium local-APICs) - * - * the code is designed to be easily extended with new/different - * interrupt controllers, without having to do assembly magic. - */ - -/* - * Controller mappings for all interrupt sources: - */ -irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = -{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; - -#ifdef CONFIG_SMP -/* NB. XXX We'll want some way of fiddling with this from DOM0. */ -unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; -#endif - -static void __do_IRQ_guest(int irq); - -/* - * Special irq handlers. - */ - -void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } - -/* - * Generic no controller code - */ - -static void enable_none(unsigned int irq) { } -static unsigned int startup_none(unsigned int irq) { return 0; } -static void disable_none(unsigned int irq) { } -static void ack_none(unsigned int irq) -{ -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves, it doesnt deserve - * a generic callback i think. - */ -#if CONFIG_X86 - printk("unexpected IRQ trap at vector %02x\n", irq); -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - */ - ack_APIC_irq(); -#endif -#endif -} - -/* startup is the same as "enable", shutdown is same as "disable" */ -#define shutdown_none disable_none -#define end_none enable_none - -struct hw_interrupt_type no_irq_type = { - "none", - startup_none, - shutdown_none, - enable_none, - disable_none, - ack_none, - end_none -}; - -atomic_t irq_err_count; -#ifdef CONFIG_X86_IO_APIC -#ifdef APIC_MISMATCH_DEBUG -atomic_t irq_mis_count; -#endif -#endif - -/* - * Generic, controller-independent functions: - */ - -/* - * Global interrupt locks for SMP. Allow interrupts to come in on any - * CPU, yet make cli/sti act globally to protect critical regions.. - */ - -#ifdef CONFIG_SMP -unsigned char global_irq_holder = 0xff; -unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */ - -#define MAXCOUNT 100000000 - -/* - * I had a lockup scenario where a tight loop doing - * spin_unlock()/spin_lock() on CPU#1 was racing with - * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but - * apparently the spin_unlock() information did not make it - * through to CPU#0 ... nasty, is this by design, do we have to limit - * 'memory update oscillation frequency' artificially like here? - * - * Such 'high frequency update' races can be avoided by careful design, but - * some of our major constructs like spinlocks use similar techniques, - * it would be nice to clarify this issue. Set this define to 0 if you - * want to check whether your system freezes. I suspect the delay done - * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but - * i thought that such things are guaranteed by design, since we use - * the 'LOCK' prefix. - */ -#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0 - -#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND -# define SYNC_OTHER_CORES(x) udelay(x+1) -#else -/* - * We have to allow irqs to arrive between __sti and __cli - */ -# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") -#endif - -static inline void wait_on_irq(int cpu) -{ - for (;;) { - - /* - * Wait until all interrupts are gone. Wait - * for bottom half handlers unless we're - * already executing in one.. - */ - if (!irqs_running()) - if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) - break; - - /* Duh, we have to loop. Release the lock to avoid deadlocks */ - clear_bit(0,&global_irq_lock); - - for (;;) { - __sti(); - SYNC_OTHER_CORES(cpu); - __cli(); - if (irqs_running()) - continue; - if (global_irq_lock) - continue; - if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) - continue; - if (!test_and_set_bit(0,&global_irq_lock)) - break; - } - } -} - -/* - * This is called when we want to synchronize with - * interrupts. We may for example tell a device to - * stop sending interrupts: but to make sure there - * are no interrupts that are executing on another - * CPU we need to call this function. - */ -void synchronize_irq(void) -{ - if (irqs_running()) { - /* Stupid approach */ - cli(); - sti(); - } -} - -static inline void get_irqlock(int cpu) -{ - if (test_and_set_bit(0,&global_irq_lock)) { - /* do we already hold the lock? */ - if ((unsigned char) cpu == global_irq_holder) - return; - /* Uhhuh.. Somebody else got it. Wait.. */ - do { - do { - rep_nop(); - } while (test_bit(0,&global_irq_lock)); - } while (test_and_set_bit(0,&global_irq_lock)); - } - /* - * We also to make sure that nobody else is running - * in an interrupt context. - */ - wait_on_irq(cpu); - - /* - * Ok, finally.. - */ - global_irq_holder = cpu; -} - -#define EFLAGS_IF_SHIFT 9 - -/* - * A global "cli()" while in an interrupt context - * turns into just a local cli(). Interrupts - * should use spinlocks for the (very unlikely) - * case that they ever want to protect against - * each other. - * - * If we already have local interrupts disabled, - * this will not turn a local disable into a - * global one (problems with spinlocks: this makes - * save_flags+cli+sti usable inside a spinlock). - */ -void __global_cli(void) -{ - unsigned int flags; - - __save_flags(flags); - if (flags & (1 << EFLAGS_IF_SHIFT)) { - int cpu = smp_processor_id(); - __cli(); - if (!local_irq_count(cpu)) - get_irqlock(cpu); - } -} - -void __global_sti(void) -{ - int cpu = smp_processor_id(); - - if (!local_irq_count(cpu)) - release_irqlock(cpu); - __sti(); -} - -/* - * SMP flags value to restore to: - * 0 - global cli - * 1 - global sti - * 2 - local cli - * 3 - local sti - */ -unsigned long __global_save_flags(void) -{ - int retval; - int local_enabled; - unsigned long flags; - int cpu = smp_processor_id(); - - __save_flags(flags); - local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1; - /* default to local */ - retval = 2 + local_enabled; - - /* check for global flags if we're not in an interrupt */ - if (!local_irq_count(cpu)) { - if (local_enabled) - retval = 1; - if (global_irq_holder == cpu) - retval = 0; - } - return retval; -} - -void __global_restore_flags(unsigned long flags) -{ - switch (flags) { - case 0: - __global_cli(); - break; - case 1: - __global_sti(); - break; - case 2: - __cli(); - break; - case 3: - __sti(); - break; - default: - printk("global_restore_flags: %08lx (%08lx)\n", - flags, (&flags)[-1]); - } -} - -#endif - -/* - * This should really return information about whether - * we should do bottom half handling etc. Right now we - * end up _always_ checking the bottom half, which is a - * waste of time and is not what some drivers would - * prefer. - */ -static int handle_IRQ_event(unsigned int irq, - struct pt_regs * regs, - struct irqaction * action) -{ - int status; - int cpu = smp_processor_id(); - - irq_enter(cpu, irq); - - status = 1; /* Force the "do bottom halves" bit */ - - if (!(action->flags & SA_INTERRUPT)) - __sti(); - - do { - status |= action->flags; - action->handler(irq, action->dev_id, regs); - action = action->next; - } while (action); - - __cli(); - - irq_exit(cpu, irq); - - return status; -} - -/* - * Generic enable/disable code: this just calls - * down into the PIC-specific version for the actual - * hardware disable after having gotten the irq - * controller lock. - */ - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Disables and Enables are - * nested. - * Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. - * - * This function may be called from IRQ context. - */ - -inline void disable_irq_nosync(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->depth++) { - desc->status |= IRQ_DISABLED; - desc->handler->disable(irq); - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ - -void disable_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - - if (!local_irq_count(smp_processor_id())) { - do { - barrier(); - cpu_relax(); - } while (irq_desc[irq].status & IRQ_INPROGRESS); - } -} - -/** - * enable_irq - enable handling of an irq - * @irq: Interrupt to enable - * - * Undoes the effect of one call to disable_irq(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. - * - * This function may be called from IRQ context. - */ - -void enable_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - switch (desc->depth) { - case 1: { - unsigned int status = desc->status & ~IRQ_DISABLED; - desc->status = status; - if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { - desc->status = status | IRQ_REPLAY; - hw_resend_irq(desc->handler,irq); - } - desc->handler->enable(irq); - /* fall-through */ - } - default: - desc->depth--; - break; - case 0: - printk("enable_irq(%u) unbalanced from %p\n", irq, - __builtin_return_address(0)); - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -asmlinkage unsigned int do_IRQ(struct pt_regs regs) -{ - /* - * We ack quickly, we don't want the irq controller - * thinking we're snobs just because some other CPU has - * disabled global interrupts (we have already done the - * INT_ACK cycles, it's too late to try to pretend to the - * controller that we aren't taking the interrupt). - * - * 0 return value means that this irq is already being - * handled by some other CPU. (or is disabled) - */ - int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */ - irq_desc_t *desc = irq_desc + irq; - struct irqaction * action; - unsigned int status; - -#ifdef PERF_COUNTERS - int cpu = smp_processor_id(); - u32 cc_start, cc_end; - - perfc_incra(irqs, cpu); - rdtscl(cc_start); -#endif - - spin_lock(&desc->lock); - desc->handler->ack(irq); - - /* - REPLAY is when Linux resends an IRQ that was dropped earlier - WAITING is used by probe to mark irqs that are being tested - */ - status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); - status |= IRQ_PENDING; /* we _want_ to handle it */ - - /* We hook off guest-bound IRQs for special handling. */ - if ( status & IRQ_GUEST ) - { - __do_IRQ_guest(irq); - spin_unlock(&desc->lock); - return 1; - } - - /* - * If the IRQ is disabled for whatever reason, we cannot use the action we - * have. - */ - action = NULL; - if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { - action = desc->action; - status &= ~IRQ_PENDING; /* we commit to handling */ - status |= IRQ_INPROGRESS; /* we are handling it */ - } - desc->status = status; - - /* - * If there is no IRQ handler or it was disabled, exit early. Since we set - * PENDING, if another processor is handling a different instance of this - * same irq, the other processor will take care of it. - */ - if (!action) - goto out; - - /* - * Edge triggered interrupts need to remember pending events. This applies - * to any hw interrupts that allow a second instance of the same irq to - * arrive while we are in do_IRQ or in the handler. But the code here only - * handles the _second_ instance of the irq, not the third or fourth. So - * it is mostly useful for irq hardware that does not mask cleanly in an - * SMP environment. - */ - for (;;) { - spin_unlock(&desc->lock); - handle_IRQ_event(irq, ®s, action); - spin_lock(&desc->lock); - - if (!(desc->status & IRQ_PENDING)) - break; - desc->status &= ~IRQ_PENDING; - } - desc->status &= ~IRQ_INPROGRESS; - out: - /* - * The ->end() handler has to deal with interrupts which got disabled - * while the handler was running. - */ - desc->handler->end(irq); - spin_unlock(&desc->lock); - -#ifdef PERF_COUNTERS - rdtscl(cc_end); - - if ( !action || (!(action->flags & SA_NOPROFILE)) ) - { - perfc_adda(irq_time, cpu, cc_end - cc_start); -#ifndef NDEBUG - if ( (cc_end - cc_start) > (cpu_khz * 100) ) - printk("Long interrupt %08x -> %08x\n", cc_start, cc_end); -#endif - } -#endif - - return 1; -} - -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * - * SA_INTERRUPT Disable local interrupts while processing - */ - -int request_irq(unsigned int irq, - void (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, - const char * devname, - void *dev_id) -{ - int retval; - struct irqaction * action; - - if (irq >= NR_IRQS) - return -EINVAL; - if (!handler) - return -EINVAL; - - action = (struct irqaction *) - kmalloc(sizeof(struct irqaction), GFP_KERNEL); - if (!action) - return -ENOMEM; - - action->handler = handler; - action->flags = irqflags; - action->mask = 0; - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - if (retval) - kfree(action); - - return retval; -} - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. - * - * This function may be called from interrupt context. - * - * Bugs: Attempting to free an irq in a handler for the same irq hangs - * the machine. - */ - -void free_irq(unsigned int irq, void *dev_id) -{ - irq_desc_t *desc; - struct irqaction **p; - unsigned long flags; - - if (irq >= NR_IRQS) - return; - - desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - for (;;) { - struct irqaction * action = *p; - if (action) { - struct irqaction **pp = p; - p = &action->next; - if (action->dev_id != dev_id) - continue; - - /* Found it - now remove it from the list of entries */ - *pp = action->next; - if (!desc->action) { - desc->status |= IRQ_DISABLED; - desc->handler->shutdown(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - -#ifdef CONFIG_SMP - /* Wait to make sure it's not being used on another CPU */ - while (desc->status & IRQ_INPROGRESS) { - barrier(); - cpu_relax(); - } -#endif - kfree(action); - return; - } - printk("Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); - return; - } -} - -/* - * IRQ autodetection code.. - * - * This depends on the fact that any interrupt that - * comes in on to an unassigned handler will get stuck - * with "IRQ_WAITING" cleared and the interrupt - * disabled. - */ - -static spinlock_t probe_sem = SPIN_LOCK_UNLOCKED; - -/** - * probe_irq_on - begin an interrupt autodetect - * - * Commence probing for an interrupt. The interrupts are scanned - * and a mask of potential interrupt lines is returned. - * - */ - -unsigned long probe_irq_on(void) -{ - unsigned int i; - irq_desc_t *desc; - unsigned long val; - unsigned long s=0, e=0; - - spin_lock(&probe_sem); - /* - * something may have generated an irq long ago and we want to - * flush such a longstanding irq before considering it as spurious. - */ - for (i = NR_IRQS-1; i > 0; i--) { - desc = irq_desc + i; - - spin_lock_irq(&desc->lock); - if (!irq_desc[i].action) - irq_desc[i].handler->startup(i); - spin_unlock_irq(&desc->lock); - } - - /* Wait for longstanding interrupts to trigger (20ms delay). */ - rdtscl(s); - do { - synchronize_irq(); - rdtscl(e); - } while ( ((e-s)/ticks_per_usec) < 20000 ); - - /* - * enable any unassigned irqs - * (we must startup again here because if a longstanding irq - * happened in the previous stage, it may have masked itself) - */ - for (i = NR_IRQS-1; i > 0; i--) { - desc = irq_desc + i; - - spin_lock_irq(&desc->lock); - if (!desc->action) { - desc->status |= IRQ_AUTODETECT | IRQ_WAITING; - if (desc->handler->startup(i)) - desc->status |= IRQ_PENDING; - } - spin_unlock_irq(&desc->lock); - } - - /* - * Wait for spurious interrupts to trigger (100ms delay). - */ - rdtscl(s); - do { - synchronize_irq(); - rdtscl(e); - } while ( ((e-s)/ticks_per_usec) < 100000 ); - - /* - * Now filter out any obviously spurious interrupts - */ - val = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - /* It triggered already - consider it spurious. */ - if (!(status & IRQ_WAITING)) { - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } else - if (i < 32) - val |= 1 << i; - } - spin_unlock_irq(&desc->lock); - } - - return val; -} - -/* - * Return a mask of triggered interrupts (this - * can handle only legacy ISA interrupts). - */ - -/** - * probe_irq_mask - scan a bitmap of interrupt lines - * @val: mask of interrupts to consider - * - * Scan the ISA bus interrupt lines and return a bitmap of - * active interrupts. The interrupt probe logic state is then - * returned to its previous value. - * - * Note: we need to scan all the irq's even though we will - * only return ISA irq numbers - just so that we reset them - * all to a known state. - */ -unsigned int probe_irq_mask(unsigned long val) -{ - int i; - unsigned int mask; - - mask = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (i < 16 && !(status & IRQ_WAITING)) - mask |= 1 << i; - - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - spin_unlock(&probe_sem); - - return mask & val; -} - -/* - * Return the one interrupt that triggered (this can - * handle any interrupt source). - */ - -/** - * probe_irq_off - end an interrupt autodetect - * @val: mask of potential interrupts (unused) - * - * Scans the unused interrupt lines and returns the line which - * appears to have triggered the interrupt. If no interrupt was - * found then zero is returned. If more than one interrupt is - * found then minus the first candidate is returned to indicate - * their is doubt. - * - * The interrupt probe logic state is returned to its previous - * value. - * - * BUGS: When used in a module (which arguably shouldnt happen) - * nothing prevents two IRQ probe callers from overlapping. The - * results of this are non-optimal. - */ - -int probe_irq_off(unsigned long val) -{ - int i, irq_found, nr_irqs; - - nr_irqs = 0; - irq_found = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (!(status & IRQ_WAITING)) { - if (!nr_irqs) - irq_found = i; - nr_irqs++; - } - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - spin_unlock(&probe_sem); - - if (nr_irqs > 1) - irq_found = -irq_found; - return irq_found; -} - -/* this was setup_x86_irq but it seems pretty generic */ -int setup_irq(unsigned int irq, struct irqaction * new) -{ - int shared = 0; - unsigned long flags; - struct irqaction *old, **p; - irq_desc_t *desc = irq_desc + irq; - - /* - * The following block of code has to be executed atomically - */ - spin_lock_irqsave(&desc->lock,flags); - - if ( desc->status & IRQ_GUEST ) - { - spin_unlock_irqrestore(&desc->lock,flags); - return -EBUSY; - } - - p = &desc->action; - if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&desc->lock,flags); - return -EBUSY; - } - - /* add new interrupt at end of irq queue */ - do { - p = &old->next; - old = *p; - } while (old); - shared = 1; - } - - *p = new; - - if (!shared) { - desc->depth = 0; - desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); - desc->handler->startup(irq); - } - - spin_unlock_irqrestore(&desc->lock,flags); - - return 0; -} - - - -/* - * HANDLING OF GUEST-BOUND PHYSICAL IRQS - */ - -#define IRQ_MAX_GUESTS 7 -typedef struct { - u8 nr_guests; - u8 in_flight; - u8 shareable; - struct task_struct *guest[IRQ_MAX_GUESTS]; -} irq_guest_action_t; - -static void __do_IRQ_guest(int irq) -{ - irq_desc_t *desc = &irq_desc[irq]; - irq_guest_action_t *action = (irq_guest_action_t *)desc->action; - struct task_struct *p; - int i; - - for ( i = 0; i < action->nr_guests; i++ ) - { - p = action->guest[i]; - if ( !test_and_set_bit(irq, &p->pirq_mask) ) - action->in_flight++; - send_guest_pirq(p, irq); - } -} - -int pirq_guest_unmask(struct task_struct *p) -{ - irq_desc_t *desc; - int i, j, pirq; - u32 m; - shared_info_t *s = p->shared_info; - - for ( i = 0; i < 2; i++ ) - { - m = p->pirq_mask[i]; - while ( (j = ffs(m)) != 0 ) - { - m &= ~(1 << --j); - pirq = (i << 5) + j; - desc = &irq_desc[pirq]; - spin_lock_irq(&desc->lock); - if ( !test_bit(p->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) && - test_and_clear_bit(pirq, &p->pirq_mask) && - (--((irq_guest_action_t *)desc->action)->in_flight == 0) ) - desc->handler->end(pirq); - spin_unlock_irq(&desc->lock); - } - } - - return 0; -} - -int pirq_guest_bind(struct task_struct *p, int irq, int will_share) -{ - unsigned long flags; - irq_desc_t *desc = &irq_desc[irq]; - irq_guest_action_t *action; - int rc = 0; - - if ( !IS_CAPABLE_PHYSDEV(p) ) - return -EPERM; - - spin_lock_irqsave(&desc->lock, flags); - - action = (irq_guest_action_t *)desc->action; - - if ( !(desc->status & IRQ_GUEST) ) - { - if ( desc->action != NULL ) - { - DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n", - irq, desc->action->name); - rc = -EBUSY; - goto out; - } - - action = kmalloc(sizeof(irq_guest_action_t), GFP_KERNEL); - if ( (desc->action = (struct irqaction *)action) == NULL ) - { - DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq); - rc = -ENOMEM; - goto out; - } - - action->nr_guests = 0; - action->in_flight = 0; - action->shareable = will_share; - - desc->depth = 0; - desc->status |= IRQ_GUEST; - desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); - desc->handler->startup(irq); - - /* Attempt to bind the interrupt target to the correct CPU. */ - if ( desc->handler->set_affinity != NULL ) - desc->handler->set_affinity( - irq, apicid_to_phys_cpu_present(p->processor)); - } - else if ( !will_share || !action->shareable ) - { - DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n", - irq); - rc = -EBUSY; - goto out; - } - - if ( action->nr_guests == IRQ_MAX_GUESTS ) - { - DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq); - rc = -EBUSY; - goto out; - } - - action->guest[action->nr_guests++] = p; - - out: - spin_unlock_irqrestore(&desc->lock, flags); - return rc; -} - -int pirq_guest_unbind(struct task_struct *p, int irq) -{ - unsigned long flags; - irq_desc_t *desc = &irq_desc[irq]; - irq_guest_action_t *action; - int i; - - spin_lock_irqsave(&desc->lock, flags); - - action = (irq_guest_action_t *)desc->action; - - if ( test_and_clear_bit(irq, &p->pirq_mask) && - (--action->in_flight == 0) ) - desc->handler->end(irq); - - if ( action->nr_guests == 1 ) - { - desc->action = NULL; - kfree(action); - desc->status |= IRQ_DISABLED; - desc->status &= ~IRQ_GUEST; - desc->handler->shutdown(irq); - } - else - { - i = 0; - while ( action->guest[i] != p ) - i++; - memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1); - action->nr_guests--; - } - - spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} diff --git a/xen/arch/i386/mm.c b/xen/arch/i386/mm.c deleted file mode 100644 index 9f1eaa465b..0000000000 --- a/xen/arch/i386/mm.c +++ /dev/null @@ -1,412 +0,0 @@ -/****************************************************************************** - * arch/i386/mm.c - * - * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static inline void set_pte_phys(unsigned long vaddr, - l1_pgentry_t entry) -{ - l2_pgentry_t *l2ent; - l1_pgentry_t *l1ent; - - l2ent = &idle_pg_table[l2_table_offset(vaddr)]; - l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr); - *l1ent = entry; - - /* It's enough to flush this one mapping. */ - __flush_tlb_one(vaddr); -} - - -void __set_fixmap(enum fixed_addresses idx, - l1_pgentry_t entry) -{ - unsigned long address = __fix_to_virt(idx); - - if ( likely(idx < __end_of_fixed_addresses) ) - set_pte_phys(address, entry); - else - printk("Invalid __set_fixmap\n"); -} - - -static void __init fixrange_init(unsigned long start, - unsigned long end, - l2_pgentry_t *pg_base) -{ - l2_pgentry_t *l2e; - int i; - unsigned long vaddr, page; - - vaddr = start; - i = l2_table_offset(vaddr); - l2e = pg_base + i; - - for ( ; (i < ENTRIES_PER_L2_PAGETABLE) && (vaddr != end); l2e++, i++ ) - { - if ( !l2_pgentry_empty(*l2e) ) - continue; - page = (unsigned long)get_free_page(GFP_KERNEL); - clear_page(page); - *l2e = mk_l2_pgentry(__pa(page) | __PAGE_HYPERVISOR); - vaddr += 1 << L2_PAGETABLE_SHIFT; - } -} - -void __init paging_init(void) -{ - unsigned long addr; - void *ioremap_pt; - int i; - - /* Idle page table 1:1 maps the first part of physical memory. */ - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) - idle_pg_table[i] = - mk_l2_pgentry((i << L2_PAGETABLE_SHIFT) | - __PAGE_HYPERVISOR | _PAGE_PSE); - - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - addr = FIXADDR_START & ~((1<> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR); - - /* Create read-only mapping of MPT for guest-OS use. */ - idle_pg_table[READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] = - idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT]; - mk_l2_readonly(idle_pg_table + - (READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT)); - - /* Set up mapping cache for domain pages. */ - mapcache = (unsigned long *)get_free_page(GFP_KERNEL); - clear_page(mapcache); - idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR); - - /* Set up linear page table mapping. */ - idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR); - -} - -void __init zap_low_mappings(void) -{ - int i; - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) - idle_pg_table[i] = mk_l2_pgentry(0); - flush_tlb_all_pge(); -} - - -long do_stack_switch(unsigned long ss, unsigned long esp) -{ - int nr = smp_processor_id(); - struct tss_struct *t = &init_tss[nr]; - - /* We need to do this check as we load and use SS on guest's behalf. */ - if ( (ss & 3) == 0 ) - return -EPERM; - - current->thread.guestos_ss = ss; - current->thread.guestos_sp = esp; - t->ss1 = ss; - t->esp1 = esp; - - return 0; -} - - -/* Returns TRUE if given descriptor is valid for GDT or LDT. */ -int check_descriptor(unsigned long a, unsigned long b) -{ - unsigned long base, limit; - - /* A not-present descriptor will always fault, so is safe. */ - if ( !(b & _SEGMENT_P) ) - goto good; - - /* - * We don't allow a DPL of zero. There is no legitimate reason for - * specifying DPL==0, and it gets rather dangerous if we also accept call - * gates (consider a call gate pointing at another guestos descriptor with - * DPL 0 -- this would get the OS ring-0 privileges). - */ - if ( (b & _SEGMENT_DPL) == 0 ) - goto bad; - - if ( !(b & _SEGMENT_S) ) - { - /* - * System segment: - * 1. Don't allow interrupt or trap gates as they belong in the IDT. - * 2. Don't allow TSS descriptors or task gates as we don't - * virtualise x86 tasks. - * 3. Don't allow LDT descriptors because they're unnecessary and - * I'm uneasy about allowing an LDT page to contain LDT - * descriptors. In any case, Xen automatically creates the - * required descriptor when reloading the LDT register. - * 4. We allow call gates but they must not jump to a private segment. - */ - - /* Disallow everything but call gates. */ - if ( (b & _SEGMENT_TYPE) != 0xc00 ) - goto bad; - - /* Can't allow far jump to a Xen-private segment. */ - if ( !VALID_CODESEL(a>>16) ) - goto bad; - - /* Reserved bits must be zero. */ - if ( (b & 0xe0) != 0 ) - goto bad; - - /* No base/limit check is needed for a call gate. */ - goto good; - } - - /* Check that base/limit do not overlap Xen-private space. */ - base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16); - limit = (b&0xf0000) | (a&0xffff); - limit++; /* We add one because limit is inclusive. */ - if ( (b & _SEGMENT_G) ) - limit <<= 12; - if ( ((base + limit) <= base) || - ((base + limit) > PAGE_OFFSET) ) - goto bad; - - good: - return 1; - bad: - return 0; -} - - -long set_gdt(struct task_struct *p, - unsigned long *frames, - unsigned int entries) -{ - /* NB. There are 512 8-byte entries per GDT page. */ - int i, nr_pages = (entries + 511) / 512; - unsigned long pfn; - struct desc_struct *vgdt; - - /* Check the new GDT. */ - for ( i = 0; i < nr_pages; i++ ) - { - if ( unlikely(frames[i] >= max_page) || - unlikely(!get_page_and_type(&frame_table[frames[i]], - p, PGT_gdt_page)) ) - goto fail; - } - - /* Copy reserved GDT entries to the new GDT. */ - vgdt = map_domain_mem(frames[0] << PAGE_SHIFT); - memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY, - gdt_table + FIRST_RESERVED_GDT_ENTRY, - NR_RESERVED_GDT_ENTRIES*8); - unmap_domain_mem(vgdt); - - /* Tear down the old GDT. */ - for ( i = 0; i < 16; i++ ) - { - if ( (pfn = l1_pgentry_to_pagenr(p->mm.perdomain_pt[i])) != 0 ) - put_page_and_type(&frame_table[pfn]); - p->mm.perdomain_pt[i] = mk_l1_pgentry(0); - } - - /* Install the new GDT. */ - for ( i = 0; i < nr_pages; i++ ) - p->mm.perdomain_pt[i] = - mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR); - - SET_GDT_ADDRESS(p, GDT_VIRT_START); - SET_GDT_ENTRIES(p, (entries*8)-1); - - return 0; - - fail: - while ( i-- > 0 ) - put_page_and_type(&frame_table[frames[i]]); - return -EINVAL; -} - - -long do_set_gdt(unsigned long *frame_list, unsigned int entries) -{ - int nr_pages = (entries + 511) / 512; - unsigned long frames[16]; - long ret; - - if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) ) - return -EINVAL; - - if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) ) - return -EFAULT; - - if ( (ret = set_gdt(current, frames, entries)) == 0 ) - { - local_flush_tlb(); - __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt)); - } - - return ret; -} - - -long do_update_descriptor( - unsigned long pa, unsigned long word1, unsigned long word2) -{ - unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT; - struct pfn_info *page; - long ret = -EINVAL; - - if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) ) - return -EINVAL; - - page = &frame_table[pfn]; - if ( unlikely(!get_page(page, current)) ) - goto out; - - /* Check if the given frame is in use in an unsafe context. */ - switch ( page->type_and_flags & PGT_type_mask ) - { - case PGT_gdt_page: - /* Disallow updates of Xen-reserved descriptors in the current GDT. */ - if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) && - (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) && - (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) ) - goto out; - if ( unlikely(!get_page_type(page, PGT_gdt_page)) ) - goto out; - break; - case PGT_ldt_page: - if ( unlikely(!get_page_type(page, PGT_ldt_page)) ) - goto out; - break; - default: - if ( unlikely(!get_page_type(page, PGT_writeable_page)) ) - goto out; - break; - } - - /* All is good so make the update. */ - gdt_pent = map_domain_mem(pa); - gdt_pent[0] = word1; - gdt_pent[1] = word2; - unmap_domain_mem(gdt_pent); - - put_page_type(page); - - ret = 0; /* success */ - - out: - put_page(page); - return ret; -} - -#ifdef MEMORY_GUARD - -void *memguard_init(void *heap_start) -{ - l1_pgentry_t *l1; - int i, j; - - /* Round the allocation pointer up to a page boundary. */ - heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) & - PAGE_MASK); - - /* Memory guarding is incompatible with super pages. */ - for ( i = 0; i < (MAX_MONITOR_ADDRESS >> L2_PAGETABLE_SHIFT); i++ ) - { - l1 = (l1_pgentry_t *)heap_start; - heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE); - for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ ) - l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) | - (j << L1_PAGETABLE_SHIFT) | - __PAGE_HYPERVISOR); - idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] = - mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR); - } - - return heap_start; -} - -static void __memguard_change_range(void *p, unsigned long l, int guard) -{ - l1_pgentry_t *l1; - l2_pgentry_t *l2; - unsigned long _p = (unsigned long)p; - unsigned long _l = (unsigned long)l; - - /* Ensure we are dealing with a page-aligned whole number of pages. */ - ASSERT((_p&PAGE_MASK) != 0); - ASSERT((_l&PAGE_MASK) != 0); - ASSERT((_p&~PAGE_MASK) == 0); - ASSERT((_l&~PAGE_MASK) == 0); - - while ( _l != 0 ) - { - l2 = &idle_pg_table[l2_table_offset(_p)]; - l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p); - if ( guard ) - *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT); - else - *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT); - _p += PAGE_SIZE; - _l -= PAGE_SIZE; - } -} - -void memguard_guard_range(void *p, unsigned long l) -{ - __memguard_change_range(p, l, 1); - local_flush_tlb(); -} - -void memguard_unguard_range(void *p, unsigned long l) -{ - __memguard_change_range(p, l, 0); -} - -int memguard_is_guarded(void *p) -{ - l1_pgentry_t *l1; - l2_pgentry_t *l2; - unsigned long _p = (unsigned long)p; - l2 = &idle_pg_table[l2_table_offset(_p)]; - l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p); - return !(l1_pgentry_val(*l1) & _PAGE_PRESENT); -} - -#endif diff --git a/xen/arch/i386/mpparse.c b/xen/arch/i386/mpparse.c deleted file mode 100644 index e11e962c9c..0000000000 --- a/xen/arch/i386/mpparse.c +++ /dev/null @@ -1,1381 +0,0 @@ -/* - * Intel Multiprocessor Specificiation 1.1 and 1.4 - * compliant MP-table parsing routines. - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar - * - * Fixes - * Erich Boleyn : MP v1.4 and additional changes. - * Alan Cox : Added EBDA scanning - * Ingo Molnar : various cleanups and rewrites - * Maciej W. Rozycki: Bits for default MP configurations - * Paul Diefenbaugh: Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int numnodes = 1; /* XXX Xen */ - -/* Have we found an MP table */ -int smp_found_config; - -/* - * Various Linux-internal data structures created from the - * MP-table. - */ -int apic_version [MAX_APICS]; -int quad_local_to_mp_bus_id [NR_CPUS/4][4]; -int mp_current_pci_id; -int *mp_bus_id_to_type; -int *mp_bus_id_to_node; -int *mp_bus_id_to_local; -int *mp_bus_id_to_pci_bus; -int max_mp_busses; -int max_irq_sources; - -/* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; - -/* # of MP IRQ source entries */ -struct mpc_config_intsrc *mp_irqs; - -/* MP IRQ source entries */ -int mp_irq_entries; - -int nr_ioapics; - -int pic_mode; -unsigned long mp_lapic_addr; - -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; -unsigned int boot_cpu_logical_apicid = -1U; -/* Internal processor count */ -static unsigned int num_processors; - -/* Bitmask of physically existing CPUs */ -unsigned long phys_cpu_present_map; -unsigned long logical_cpu_present_map; - -#ifdef CONFIG_X86_CLUSTERED_APIC -unsigned char esr_disable = 0; -unsigned char clustered_apic_mode = CLUSTERED_APIC_NONE; -unsigned int apic_broadcast_id = APIC_BROADCAST_ID_APIC; -#endif -unsigned char raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; - -/* - * Intel MP BIOS table parsing routines: - */ - -#ifndef CONFIG_X86_VISWS_APIC -/* - * Checksum an MP configuration block. - */ - -static int __init mpf_checksum(unsigned char *mp, int len) -{ - int sum = 0; - - while (len--) - sum += *mp++; - - return sum & 0xFF; -} - -/* - * Processor encoding in an MP configuration block - */ - -static char __init *mpc_family(int family,int model) -{ - static char n[32]; - static char *model_defs[]= - { - "80486DX","80486DX", - "80486SX","80486DX/2 or 80487", - "80486SL","80486SX/2", - "Unknown","80486DX/2-WB", - "80486DX/4","80486DX/4-WB" - }; - - switch (family) { - case 0x04: - if (model < 10) - return model_defs[model]; - break; - - case 0x05: - return("Pentium(tm)"); - - case 0x06: - return("Pentium(tm) Pro"); - - case 0x0F: - if (model == 0x00) - return("Pentium 4(tm)"); - if (model == 0x01) - return("Pentium 4(tm)"); - if (model == 0x02) - return("Pentium 4(tm) XEON(tm)"); - if (model == 0x0F) - return("Special controller"); - } - sprintf(n,"Unknown CPU [%d:%d]",family, model); - return n; -} - -/* - * Have to match translation table entries to main table entries by counter - * hence the mpc_record variable .... can't see a less disgusting way of - * doing this .... - */ - -static int mpc_record; -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; - -void __init MP_processor_info (struct mpc_config_processor *m) -{ - int ver, quad, logical_apicid; - - if (!(m->mpc_cpuflag & CPU_ENABLED)) - return; - - logical_apicid = m->mpc_apicid; - if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { - quad = translation_table[mpc_record]->trans_quad; - logical_apicid = (quad << 4) + - (m->mpc_apicid ? m->mpc_apicid << 1 : 1); - printk("Processor #%d %s APIC version %d (quad %d, apic %d)\n", - m->mpc_apicid, - mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , - (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), - m->mpc_apicver, quad, logical_apicid); - } else { - printk("Processor #%d %s APIC version %d\n", - m->mpc_apicid, - mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , - (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), - m->mpc_apicver); - } - - if (m->mpc_featureflag&(1<<0)) - Dprintk(" Floating point unit present.\n"); - if (m->mpc_featureflag&(1<<7)) - Dprintk(" Machine Exception supported.\n"); - if (m->mpc_featureflag&(1<<8)) - Dprintk(" 64 bit compare & exchange supported.\n"); - if (m->mpc_featureflag&(1<<9)) - Dprintk(" Internal APIC present.\n"); - if (m->mpc_featureflag&(1<<11)) - Dprintk(" SEP present.\n"); - if (m->mpc_featureflag&(1<<12)) - Dprintk(" MTRR present.\n"); - if (m->mpc_featureflag&(1<<13)) - Dprintk(" PGE present.\n"); - if (m->mpc_featureflag&(1<<14)) - Dprintk(" MCA present.\n"); - if (m->mpc_featureflag&(1<<15)) - Dprintk(" CMOV present.\n"); - if (m->mpc_featureflag&(1<<16)) - Dprintk(" PAT present.\n"); - if (m->mpc_featureflag&(1<<17)) - Dprintk(" PSE present.\n"); - if (m->mpc_featureflag&(1<<18)) - Dprintk(" PSN present.\n"); - if (m->mpc_featureflag&(1<<19)) - Dprintk(" Cache Line Flush Instruction present.\n"); - /* 20 Reserved */ - if (m->mpc_featureflag&(1<<21)) - Dprintk(" Debug Trace and EMON Store present.\n"); - if (m->mpc_featureflag&(1<<22)) - Dprintk(" ACPI Thermal Throttle Registers present.\n"); - if (m->mpc_featureflag&(1<<23)) - Dprintk(" MMX present.\n"); - if (m->mpc_featureflag&(1<<24)) - Dprintk(" FXSR present.\n"); - if (m->mpc_featureflag&(1<<25)) - Dprintk(" XMM present.\n"); - if (m->mpc_featureflag&(1<<26)) - Dprintk(" Willamette New Instructions present.\n"); - if (m->mpc_featureflag&(1<<27)) - Dprintk(" Self Snoop present.\n"); - if (m->mpc_featureflag&(1<<28)) - Dprintk(" HT present.\n"); - if (m->mpc_featureflag&(1<<29)) - Dprintk(" Thermal Monitor present.\n"); - /* 30, 31 Reserved */ - - - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { - Dprintk(" Bootup CPU\n"); - boot_cpu_physical_apicid = m->mpc_apicid; - boot_cpu_logical_apicid = logical_apicid; - } - - if (num_processors >= NR_CPUS){ - printk(KERN_WARNING "NR_CPUS limit of %i reached. Cannot " - "boot CPU(apicid 0x%x).\n", NR_CPUS, m->mpc_apicid); - return; - } - num_processors++; - - if (m->mpc_apicid > MAX_APICS) { - printk("Processor #%d INVALID. (Max ID: %d).\n", - m->mpc_apicid, MAX_APICS); - --num_processors; - return; - } - ver = m->mpc_apicver; - - logical_cpu_present_map |= 1 << (num_processors-1); - phys_cpu_present_map |= apicid_to_phys_cpu_present(m->mpc_apicid); - - /* - * Validate version - */ - if (ver == 0x0) { - printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); - ver = 0x10; - } - apic_version[m->mpc_apicid] = ver; - raw_phys_apicid[num_processors - 1] = m->mpc_apicid; -} - -static void __init MP_bus_info (struct mpc_config_bus *m) -{ - char str[7]; - int quad; - - memcpy(str, m->mpc_bustype, 6); - str[6] = 0; - - if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { - quad = translation_table[mpc_record]->trans_quad; - mp_bus_id_to_node[m->mpc_busid] = quad; - mp_bus_id_to_local[m->mpc_busid] = translation_table[mpc_record]->trans_local; - quad_local_to_mp_bus_id[quad][translation_table[mpc_record]->trans_local] = m->mpc_busid; - printk("Bus #%d is %s (node %d)\n", m->mpc_busid, str, quad); - } else { - Dprintk("Bus #%d is %s\n", m->mpc_busid, str); - } - - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; - } else { - printk("Unknown bustype %s - ignoring\n", str); - } -} - -static void __init MP_ioapic_info (struct mpc_config_ioapic *m) -{ - if (!(m->mpc_flags & MPC_APIC_USABLE)) - return; - - printk("I/O APIC #%d Version %d at 0x%lX.\n", - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); - if (nr_ioapics >= MAX_IO_APICS) { - printk("Max # of I/O APICs (%d) exceeded (found %d).\n", - MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); - } - if (!m->mpc_apicaddr) { - printk(KERN_ERR "WARNING: bogus zero I/O APIC address" - " found in MP table, skipping!\n"); - return; - } - mp_ioapics[nr_ioapics] = *m; - nr_ioapics++; -} - -static void __init MP_intsrc_info (struct mpc_config_intsrc *m) -{ - mp_irqs [mp_irq_entries] = *m; - Dprintk("Int: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); - if (++mp_irq_entries == max_irq_sources) - panic("Max # of irq sources exceeded!!\n"); -} - -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) -{ - Dprintk("Lint: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC LINT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); - /* - * Well it seems all SMP boards in existence - * use ExtINT/LVT1 == LINT0 and - * NMI/LVT2 == LINT1 - the following check - * will show us if this assumptions is false. - * Until then we do not have to add baggage. - */ - if ((m->mpc_irqtype == mp_ExtINT) && - (m->mpc_destapiclint != 0)) - BUG(); - if ((m->mpc_irqtype == mp_NMI) && - (m->mpc_destapiclint != 1)) - BUG(); -} - -static void __init MP_translation_info (struct mpc_config_translation *m) -{ - printk("Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); - - if (mpc_record >= MAX_MPC_ENTRY) - printk("MAX_MPC_ENTRY exceeded!\n"); - else - translation_table[mpc_record] = m; /* stash this for later */ - if (m->trans_quad+1 > numnodes) - numnodes = m->trans_quad+1; -} - -/* - * Read/parse the MPC oem tables - */ - -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \ - unsigned short oemsize) -{ - int count = sizeof (*oemtable); /* the header size */ - unsigned char *oemptr = ((unsigned char *)oemtable)+count; - - printk("Found an OEM MPC table at %8p - parsing it ... \n", oemtable); - if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4)) - { - printk("SMP mpc oemtable: bad signature [%c%c%c%c]!\n", - oemtable->oem_signature[0], - oemtable->oem_signature[1], - oemtable->oem_signature[2], - oemtable->oem_signature[3]); - return; - } - if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length)) - { - printk("SMP oem mptable: checksum error!\n"); - return; - } - while (count < oemtable->oem_length) { - switch (*oemptr) { - case MP_TRANSLATION: - { - struct mpc_config_translation *m= - (struct mpc_config_translation *)oemptr; - MP_translation_info(m); - oemptr += sizeof(*m); - count += sizeof(*m); - ++mpc_record; - break; - } - default: - { - printk("Unrecognised OEM table entry type! - %d\n", (int) *oemptr); - return; - } - } - } -} - -/* - * Read/parse the MPC - */ - -static int __init smp_read_mpc(struct mp_config_table *mpc) -{ - char oem[16], prod[14]; - int count=sizeof(*mpc); - unsigned char *mpt=((unsigned char *)mpc)+count; - int num_bus = 0; - int num_irq = 0; - unsigned char *bus_data; - - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { - panic("SMP mptable: bad signature [%c%c%c%c]!\n", - mpc->mpc_signature[0], - mpc->mpc_signature[1], - mpc->mpc_signature[2], - mpc->mpc_signature[3]); - return 0; - } - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { - panic("SMP mptable: checksum error!\n"); - return 0; - } - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", - mpc->mpc_spec); - return 0; - } - if (!mpc->mpc_lapic) { - printk(KERN_ERR "SMP mptable: null local APIC address!\n"); - return 0; - } - memcpy(oem,mpc->mpc_oem,8); - oem[8]=0; - printk("OEM ID: %s ",oem); - - memcpy(prod,mpc->mpc_productid,12); - prod[12]=0; - printk("Product ID: %s ",prod); - - detect_clustered_apic(oem, prod); - - printk("APIC at: 0x%lX\n",mpc->mpc_lapic); - - /* - * Save the local APIC address (it might be non-default) -- but only - * if we're not using ACPI. - */ - if (!acpi_lapic) - mp_lapic_addr = mpc->mpc_lapic; - - if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) { - /* We need to process the oem mpc tables to tell us which quad things are in ... */ - mpc_record = 0; - smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, mpc->mpc_oemsize); - mpc_record = 0; - } - - /* Pre-scan to determine the number of bus and - * interrupts records we have - */ - while (count < mpc->mpc_length) { - switch (*mpt) { - case MP_PROCESSOR: - mpt += sizeof(struct mpc_config_processor); - count += sizeof(struct mpc_config_processor); - break; - case MP_BUS: - ++num_bus; - mpt += sizeof(struct mpc_config_bus); - count += sizeof(struct mpc_config_bus); - break; - case MP_INTSRC: - ++num_irq; - mpt += sizeof(struct mpc_config_intsrc); - count += sizeof(struct mpc_config_intsrc); - break; - case MP_IOAPIC: - mpt += sizeof(struct mpc_config_ioapic); - count += sizeof(struct mpc_config_ioapic); - break; - case MP_LINTSRC: - mpt += sizeof(struct mpc_config_lintsrc); - count += sizeof(struct mpc_config_lintsrc); - break; - default: - count = mpc->mpc_length; - break; - } - } - /* - * Paranoia: Allocate one extra of both the number of busses and number - * of irqs, and make sure that we have at least 4 interrupts per PCI - * slot. But some machines do not report very many busses, so we need - * to fall back on the older defaults. - */ - ++num_bus; - max_mp_busses = max(num_bus, MAX_MP_BUSSES); - if (num_irq < (4 * max_mp_busses)) - num_irq = 4 * num_bus; /* 4 intr/PCI slot */ - ++num_irq; - max_irq_sources = max(num_irq, MAX_IRQ_SOURCES); - - count = (max_mp_busses * sizeof(int)) * 4; - count += (max_irq_sources * sizeof(struct mpc_config_intsrc)); - bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(count)); - if (!bus_data) { - printk(KERN_ERR "SMP mptable: out of memory!\n"); - return 0; - } - mp_bus_id_to_type = (int *)&bus_data[0]; - mp_bus_id_to_node = (int *)&bus_data[(max_mp_busses * sizeof(int))]; - mp_bus_id_to_local = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 2]; - mp_bus_id_to_pci_bus = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 3]; - mp_irqs = (struct mpc_config_intsrc *)&bus_data[(max_mp_busses * sizeof(int)) * 4]; - memset(mp_bus_id_to_pci_bus, -1, max_mp_busses * sizeof(int)); - - /* - * Now process the configuration blocks. - */ - count = sizeof(*mpc); - mpt = ((unsigned char *)mpc)+count; - while (count < mpc->mpc_length) { - switch(*mpt) { - case MP_PROCESSOR: - { - struct mpc_config_processor *m= - (struct mpc_config_processor *)mpt; - /* ACPI may have already provided this data */ - if (!acpi_lapic) - MP_processor_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_BUS: - { - struct mpc_config_bus *m= - (struct mpc_config_bus *)mpt; - MP_bus_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_IOAPIC: - { - struct mpc_config_ioapic *m= - (struct mpc_config_ioapic *)mpt; - MP_ioapic_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_INTSRC: - { - struct mpc_config_intsrc *m= - (struct mpc_config_intsrc *)mpt; - - MP_intsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_LINTSRC: - { - struct mpc_config_lintsrc *m= - (struct mpc_config_lintsrc *)mpt; - MP_lintsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - default: - { - count = mpc->mpc_length; - break; - } - } - ++mpc_record; - } - - if (clustered_apic_mode){ - phys_cpu_present_map = logical_cpu_present_map; - } - - - printk("Enabling APIC mode: "); - if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) - printk("Clustered Logical. "); - else if(clustered_apic_mode == CLUSTERED_APIC_XAPIC) - printk("Physical. "); - else - printk("Flat. "); - printk("Using %d I/O APICs\n",nr_ioapics); - - if (!num_processors) - printk(KERN_ERR "SMP mptable: no processors registered!\n"); - return num_processors; -} - -static int __init ELCR_trigger(unsigned int irq) -{ - unsigned int port; - - port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; -} - -static void __init construct_default_ioirq_mptable(int mpc_default_type) -{ - struct mpc_config_intsrc intsrc; - int i; - int ELCR_fallback = 0; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* conforming */ - intsrc.mpc_srcbus = 0; - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; - - intsrc.mpc_irqtype = mp_INT; - - /* - * If true, we have an ISA/PCI system with no IRQ entries - * in the MP table. To prevent the PCI interrupts from being set up - * incorrectly, we try to use the ELCR. The sanity check to see if - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can - * never be level sensitive, so we simply see if the ELCR agrees. - * If it does, we assume it's valid. - */ - if (mpc_default_type == 5) { - printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); - - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) - printk("ELCR contains invalid data... not using ELCR\n"); - else { - printk("Using ELCR to identify PCI interrupts\n"); - ELCR_fallback = 1; - } - } - - for (i = 0; i < 16; i++) { - switch (mpc_default_type) { - case 2: - if (i == 0 || i == 13) - continue; /* IRQ0 & IRQ13 not connected */ - /* fall through */ - default: - if (i == 2) - continue; /* IRQ2 is never connected */ - } - - if (ELCR_fallback) { - /* - * If the ELCR indicates a level-sensitive interrupt, we - * copy that information over to the MP table in the - * irqflag field (level sensitive, active high polarity). - */ - if (ELCR_trigger(i)) - intsrc.mpc_irqflag = 13; - else - intsrc.mpc_irqflag = 0; - } - - intsrc.mpc_srcbusirq = i; - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ - MP_intsrc_info(&intsrc); - } - - intsrc.mpc_irqtype = mp_ExtINT; - intsrc.mpc_srcbusirq = 0; - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ - MP_intsrc_info(&intsrc); -} - -static inline void __init construct_default_ISA_mptable(int mpc_default_type) -{ - struct mpc_config_processor processor; - struct mpc_config_bus bus; - struct mpc_config_ioapic ioapic; - struct mpc_config_lintsrc lintsrc; - int linttypes[2] = { mp_ExtINT, mp_NMI }; - int i; - struct { - int mp_bus_id_to_type[MAX_MP_BUSSES]; - int mp_bus_id_to_node[MAX_MP_BUSSES]; - int mp_bus_id_to_local[MAX_MP_BUSSES]; - int mp_bus_id_to_pci_bus[MAX_MP_BUSSES]; - struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - } *bus_data; - - bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(sizeof(*bus_data))); - if (!bus_data) - panic("SMP mptable: out of memory!\n"); - mp_bus_id_to_type = bus_data->mp_bus_id_to_type; - mp_bus_id_to_node = bus_data->mp_bus_id_to_node; - mp_bus_id_to_local = bus_data->mp_bus_id_to_local; - mp_bus_id_to_pci_bus = bus_data->mp_bus_id_to_pci_bus; - mp_irqs = bus_data->mp_irqs; - for (i = 0; i < MAX_MP_BUSSES; ++i) - mp_bus_id_to_pci_bus[i] = -1; - - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - processor.mpc_type = MP_PROCESSOR; - /* Either an integrated APIC or a discrete 82489DX. */ - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.mpc_cpuflag = CPU_ENABLED; - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | - boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - for (i = 0; i < 2; i++) { - processor.mpc_apicid = i; - MP_processor_info(&processor); - } - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - switch (mpc_default_type) { - default: - printk("???\nUnknown standard configuration %d\n", - mpc_default_type); - /* fall through */ - case 1: - case 5: - memcpy(bus.mpc_bustype, "ISA ", 6); - break; - case 2: - case 6: - case 3: - memcpy(bus.mpc_bustype, "EISA ", 6); - break; - case 4: - case 7: - memcpy(bus.mpc_bustype, "MCA ", 6); - } - MP_bus_info(&bus); - if (mpc_default_type > 4) { - bus.mpc_busid = 1; - memcpy(bus.mpc_bustype, "PCI ", 6); - MP_bus_info(&bus); - } - - ioapic.mpc_type = MP_IOAPIC; - ioapic.mpc_apicid = 2; - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - ioapic.mpc_flags = MPC_APIC_USABLE; - ioapic.mpc_apicaddr = 0xFEC00000; - MP_ioapic_info(&ioapic); - - /* - * We set up most of the low 16 IO-APIC pins according to MPS rules. - */ - construct_default_ioirq_mptable(mpc_default_type); - - lintsrc.mpc_type = MP_LINTSRC; - lintsrc.mpc_irqflag = 0; /* conforming */ - lintsrc.mpc_srcbusid = 0; - lintsrc.mpc_srcbusirq = 0; - lintsrc.mpc_destapic = MP_APIC_ALL; - for (i = 0; i < 2; i++) { - lintsrc.mpc_irqtype = linttypes[i]; - lintsrc.mpc_destapiclint = i; - MP_lintsrc_info(&lintsrc); - } -} - -static struct intel_mp_floating *mpf_found; - -/* - * Scan the memory blocks for an SMP configuration block. - */ -void __init get_smp_config (void) -{ - struct intel_mp_floating *mpf = mpf_found; - - /* - * ACPI may be used to obtain the entire SMP configuration or just to - * enumerate/configure processors (CONFIG_ACPI_HT_ONLY). Note that - * ACPI supports both logical (e.g. Hyper-Threading) and physical - * processors, where MPS only supports physical. - */ - if (acpi_lapic && acpi_ioapic) { - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); - return; - } - else if (acpi_lapic) - printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); - - printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); - if (mpf->mpf_feature2 & (1<<7)) { - printk(" IMCR and PIC compatibility mode.\n"); - pic_mode = 1; - } else { - printk(" Virtual Wire compatibility mode.\n"); - pic_mode = 0; - } - - /* - * Now see if we need to read further. - */ - if (mpf->mpf_feature1 != 0) { - - printk("Default MP configuration #%d\n", mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); - - } else if (mpf->mpf_physptr) { - - /* - * Read the physical hardware table. Anything here will - * override the defaults. - */ - if (!smp_read_mpc((void *)mpf->mpf_physptr)) { - smp_found_config = 0; - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); - return; - } - /* - * If there are no explicit MP IRQ entries, then we are - * broken. We set up most of the low 16 IO-APIC pins to - * ISA defaults and hope it will work. - */ - if (!mp_irq_entries) { - struct mpc_config_bus bus; - - printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - memcpy(bus.mpc_bustype, "ISA ", 6); - MP_bus_info(&bus); - - construct_default_ioirq_mptable(0); - } - - } else - BUG(); - - printk("Processors: %d\n", num_processors); - /* - * Only use the first configuration found. - */ -} - -static int __init smp_scan_config (unsigned long base, unsigned long length) -{ - unsigned long *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; - - Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); - if (sizeof(*mpf) != 16) - printk("Error: MPF size\n"); - - while (length > 0) { - mpf = (struct intel_mp_floating *)bp; - if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && - !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4)) ) { - - smp_found_config = 1; - printk("found SMP MP-table at %08lx\n", - virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); - if (mpf->mpf_physptr) - reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE); - mpf_found = mpf; - return 1; - } - bp += 4; - length -= 16; - } - return 0; -} - -void __init find_intel_smp (void) -{ - unsigned int address; - - /* - * FIXME: Linux assumes you have 640K of base ram.. - * this continues the error... - * - * 1) Scan the bottom 1K for a signature - * 2) Scan the top 1K of base RAM - * 3) Scan the 64K of bios - */ - if (smp_scan_config(0x0,0x400) || - smp_scan_config(639*0x400,0x400) || - smp_scan_config(0xF0000,0x10000)) - return; - /* - * If it is an SMP machine we should know now, unless the - * configuration is in an EISA/MCA bus machine with an - * extended bios data area. - * - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E, calculate and scan it here. - * - * NOTE! There were Linux loaders that will corrupt the EBDA - * area, and as such this kind of SMP config may be less - * trustworthy, simply because the SMP table may have been - * stomped on during early boot. Thankfully the bootloaders - * now honour the EBDA. - */ - - address = *(unsigned short *)phys_to_virt(0x40E); - address <<= 4; - smp_scan_config(address, 0x1000); -} - -#else - -/* - * The Visual Workstation is Intel MP compliant in the hardware - * sense, but it doesn't have a BIOS(-configuration table). - * No problem for Linux. - */ -void __init find_visws_smp(void) -{ - smp_found_config = 1; - - phys_cpu_present_map |= 2; /* or in id 1 */ - apic_version[1] |= 0x10; /* integrated APIC */ - apic_version[0] |= 0x10; - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; -} - -#endif - -/* - * - Intel MP Configuration Table - * - or SGI Visual Workstation configuration - */ -void __init find_smp_config (void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - find_intel_smp(); -#endif -#ifdef CONFIG_VISWS - find_visws_smp(); -#endif -} - - -/* -------------------------------------------------------------------------- - ACPI-based MP Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI_BOOT - -void __init mp_register_lapic_address ( - u64 address) -{ - mp_lapic_addr = (unsigned long) address; - - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); - - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); -} - - -void __init mp_register_lapic ( - u8 id, - u8 enabled) -{ - struct mpc_config_processor processor; - int boot_cpu = 0; - - if (id >= MAX_APICS) { - printk(KERN_WARNING "Processor #%d invalid (max %d)\n", - id, MAX_APICS); - return; - } - - if (id == boot_cpu_physical_apicid) - boot_cpu = 1; - - processor.mpc_type = MP_PROCESSOR; - processor.mpc_apicid = id; - - /* - * mp_register_lapic_address() which is called before the - * current function does the fixmap of FIX_APIC_BASE. - * Read in the correct APIC version from there - */ - processor.mpc_apicver = apic_read(APIC_LVR); - - processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); - processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - - MP_processor_info(&processor); -} - -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) - -#define MP_ISA_BUS 0 -#define MP_MAX_IOAPIC_PIN 127 - -struct mp_ioapic_routing { - int apic_id; - int irq_start; - int irq_end; - u32 pin_programmed[4]; -} mp_ioapic_routing[MAX_IO_APICS]; - - -static int __init mp_find_ioapic ( - int irq) -{ - int i = 0; - - /* Find the IOAPIC that manages this IRQ. */ - for (i = 0; i < nr_ioapics; i++) { - if ((irq >= mp_ioapic_routing[i].irq_start) - && (irq <= mp_ioapic_routing[i].irq_end)) - return i; - } - - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for IRQ %d\n", irq); - - return -1; -} - - -void __init mp_register_ioapic ( - u8 id, - u32 address, - u32 irq_base) -{ - int idx = 0; - - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); - } - if (!address) { - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in MADT table, skipping!\n"); - return; - } - - idx = nr_ioapics++; - - mp_ioapics[idx].mpc_type = MP_IOAPIC; - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mpc_apicaddr = address; - - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); - - /* - * Build basic IRQ lookup table to facilitate irq->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI IRQs). - */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; - mp_ioapic_routing[idx].irq_start = irq_base; - mp_ioapic_routing[idx].irq_end = irq_base + - io_apic_get_redir_entries(idx); - - printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " - "IRQ %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].irq_start, - mp_ioapic_routing[idx].irq_end); - - return; -} - - -void __init mp_override_legacy_irq ( - u8 bus_irq, - u8 polarity, - u8 trigger, - u32 global_irq) -{ - struct mpc_config_intsrc intsrc; - int i = 0; - int found = 0; - int ioapic = -1; - int pin = -1; - - /* - * Convert 'global_irq' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(global_irq); - if (ioapic < 0) - return; - pin = global_irq - mp_ioapic_routing[ioapic].irq_start; - - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (global_irq == 2) && (trigger == 3)) - trigger = 1; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_irqflag = (trigger << 2) | polarity; - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ - intsrc.mpc_dstirq = pin; /* INTIN# */ - - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); - - /* - * If an existing [IOAPIC.PIN -> IRQ] routing entry exists we override it. - * Otherwise create a new entry (e.g. global_irq == 2). - */ - for (i = 0; i < mp_irq_entries; i++) { - if ((mp_irqs[i].mpc_dstapic == intsrc.mpc_dstapic) - && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) { - mp_irqs[i] = intsrc; - found = 1; - break; - } - } - if (!found) { - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); - } - - return; -} - - -void __init mp_config_acpi_legacy_irqs (void) -{ - int i = 0; - int ioapic = -1; - - /* - * Initialize mp_irqs for IRQ configuration. - */ - unsigned char *bus_data; - int count; - - count = (MAX_MP_BUSSES * sizeof(int)) * 4; - count += (MAX_IRQ_SOURCES * sizeof(int)) * 4; - bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(count)); - if (!bus_data) { - panic("Fatal: can't allocate bus memory for ACPI legacy IRQ!"); - } - mp_bus_id_to_type = (int *)&bus_data[0]; - mp_bus_id_to_node = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int))]; - mp_bus_id_to_local = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 2]; - mp_bus_id_to_pci_bus = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 3]; - mp_irqs = (struct mpc_config_intsrc *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 4]; - for (i = 0; i < MAX_MP_BUSSES; ++i) - mp_bus_id_to_pci_bus[i] = -1; - - /* - * Fabricate the legacy ISA bus (bus #31). - */ - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); - - /* - * Locate the IOAPIC that manages the ISA IRQs (0-15). - */ - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - return; - - /* - * Use the default configuration for the IRQs 0-15. These may be - * overriden by (MADT) interrupt source override entries. - */ - for (i = 0; i < 16; i++) { - - if (i == 2) continue; /* Don't connect IRQ2 */ - - mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC; - mp_irqs[mp_irq_entries].mpc_irqflag = 0; /* Conforming */ - mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; - mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; - mp_irqs[mp_irq_entries].mpc_irqtype = i ? mp_INT : mp_ExtINT; /* 8259A to #0 */ - mp_irqs[mp_irq_entries].mpc_srcbusirq = i; /* Identity mapped */ - mp_irqs[mp_irq_entries].mpc_dstirq = i; - - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", - mp_irqs[mp_irq_entries].mpc_irqtype, - mp_irqs[mp_irq_entries].mpc_irqflag & 3, - (mp_irqs[mp_irq_entries].mpc_irqflag >> 2) & 3, - mp_irqs[mp_irq_entries].mpc_srcbus, - mp_irqs[mp_irq_entries].mpc_srcbusirq, - mp_irqs[mp_irq_entries].mpc_dstapic, - mp_irqs[mp_irq_entries].mpc_dstirq); - - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); - } -} - -/*extern FADT_DESCRIPTOR acpi_fadt;*/ - -void __init mp_config_ioapic_for_sci(int irq) -{ - int ioapic; - int ioapic_pin; - struct acpi_table_madt* madt; - struct acpi_table_int_src_ovr *entry = NULL; - acpi_interrupt_flags flags; - void *madt_end; - acpi_status status; - - /* - * Ensure that if there is an interrupt source override entry - * for the ACPI SCI, we leave it as is. Unfortunately this involves - * walking the MADT again. - */ - status = acpi_get_firmware_table("APIC", 1, ACPI_LOGICAL_ADDRESSING, - (struct acpi_table_header **) &madt); - if (ACPI_SUCCESS(status)) { - madt_end = (void *) (unsigned long)madt + madt->header.length; - - entry = (struct acpi_table_int_src_ovr *) - ((unsigned long) madt + sizeof(struct acpi_table_madt)); - - while ((void *) entry < madt_end) { - if (entry->header.type == ACPI_MADT_INT_SRC_OVR && - acpi_fadt.sci_int == entry->bus_irq) - goto found; - - entry = (struct acpi_table_int_src_ovr *) - ((unsigned long) entry + entry->header.length); - } - } - /* - * Although the ACPI spec says that the SCI should be level/low - * don't reprogram it unless there is an explicit MADT OVR entry - * instructing us to do so -- otherwise we break Tyan boards which - * have the SCI wired edge/high but no MADT OVR. - */ - return; - -found: - /* - * See the note at the end of ACPI 2.0b section - * 5.2.10.8 for what this is about. - */ - flags = entry->flags; - acpi_fadt.sci_int = entry->global_irq; - irq = entry->global_irq; - - ioapic = mp_find_ioapic(irq); - - ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; - - /* - * MPS INTI flags: - * trigger: 0=default, 1=edge, 3=level - * polarity: 0=default, 1=high, 3=low - * Per ACPI spec, default for SCI means level/low. - */ - io_apic_set_pci_routing(ioapic, ioapic_pin, irq, - (flags.trigger == 1 ? 0 : 1), (flags.polarity == 1 ? 0 : 1)); -} - - -#ifdef CONFIG_ACPI_PCI - -void __init mp_parse_prt (void) -{ - struct list_head *node = NULL; - struct acpi_prt_entry *entry = NULL; - int ioapic = -1; - int ioapic_pin = 0; - int irq = 0; - int idx, bit = 0; - int edge_level = 0; - int active_high_low = 0; - - /* - * Parsing through the PCI Interrupt Routing Table (PRT) and program - * routing for all entries. - */ - list_for_each(node, &acpi_prt.entries) { - entry = list_entry(node, struct acpi_prt_entry, node); - - /* Need to get irq for dynamic entry */ - if (entry->link.handle) { - irq = acpi_pci_link_get_irq(entry->link.handle, entry->link.index, &edge_level, &active_high_low); - if (!irq) - continue; - } - else { - /* Hardwired IRQ. Assume PCI standard settings */ - irq = entry->link.index; - edge_level = 1; - active_high_low = 1; - } - - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_fadt.sci_int == irq) { - entry->irq = irq; /*we still need to set entry's irq*/ - continue; - } - - ioapic = mp_find_ioapic(irq); - if (ioapic < 0) - continue; - ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; - - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->irq mappings (but unique PCI devices); - * we only only program the IOAPIC on the first. - */ - bit = ioapic_pin % 32; - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); - if (idx > 3) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, - ioapic_pin); - continue; - } - if ((1<irq = irq; - continue; - } - - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<irq = irq; - - printk(KERN_DEBUG "%02x:%02x:%02x[%c] -> %d-%d -> IRQ %d\n", - entry->id.segment, entry->id.bus, - entry->id.device, ('A' + entry->pin), - mp_ioapic_routing[ioapic].apic_id, ioapic_pin, - entry->irq); - } - - print_IO_APIC(); - - return; -} - -#endif /*CONFIG_ACPI_PCI*/ - -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ - -#endif /*CONFIG_ACPI*/ diff --git a/xen/arch/i386/nmi.c b/xen/arch/i386/nmi.c deleted file mode 100644 index 8422367492..0000000000 --- a/xen/arch/i386/nmi.c +++ /dev/null @@ -1,324 +0,0 @@ -/* - * linux/arch/i386/nmi.c - * - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. - * Keir Fraser : Pentium 4 Hyperthreading support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -unsigned int nmi_watchdog = NMI_NONE; -unsigned int watchdog_on = 0; -static unsigned int nmi_hz = HZ; -unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ -extern void show_registers(struct pt_regs *regs); - -extern int logical_proc_id[]; - -#define K7_EVNTSEL_ENABLE (1 << 22) -#define K7_EVNTSEL_INT (1 << 20) -#define K7_EVNTSEL_OS (1 << 17) -#define K7_EVNTSEL_USR (1 << 16) -#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 -#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING - -#define P6_EVNTSEL0_ENABLE (1 << 22) -#define P6_EVNTSEL_INT (1 << 20) -#define P6_EVNTSEL_OS (1 << 17) -#define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 -#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED - -#define MSR_P4_MISC_ENABLE 0x1A0 -#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) -#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) -#define MSR_P4_PERFCTR0 0x300 -#define MSR_P4_CCCR0 0x360 -#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) -#define P4_ESCR_OS0 (1<<3) -#define P4_ESCR_USR0 (1<<2) -#define P4_ESCR_OS1 (1<<1) -#define P4_ESCR_USR1 (1<<0) -#define P4_CCCR_OVF_PMI0 (1<<26) -#define P4_CCCR_OVF_PMI1 (1<<27) -#define P4_CCCR_THRESHOLD(N) ((N)<<20) -#define P4_CCCR_COMPLEMENT (1<<19) -#define P4_CCCR_COMPARE (1<<18) -#define P4_CCCR_REQUIRED (3<<16) -#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) -#define P4_CCCR_ENABLE (1<<12) -/* - * Set up IQ_COUNTER{0,1} to behave like a clock, by having IQ_CCCR{0,1} filter - * CRU_ESCR0 (with any non-null event selector) through a complemented - * max threshold. [IA32-Vol3, Section 14.9.9] - */ -#define MSR_P4_IQ_COUNTER0 0x30C -#define MSR_P4_IQ_COUNTER1 0x30D -#define MSR_P4_IQ_CCCR0 0x36C -#define MSR_P4_IQ_CCCR1 0x36D -#define MSR_P4_CRU_ESCR0 0x3B8 /* ESCR no. 4 */ -#define P4_NMI_CRU_ESCR0 \ - (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS0|P4_ESCR_USR0| \ - P4_ESCR_OS1|P4_ESCR_USR1) -#define P4_NMI_IQ_CCCR0 \ - (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ - P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) -#define P4_NMI_IQ_CCCR1 \ - (P4_CCCR_OVF_PMI1|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ - P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) - -int __init check_nmi_watchdog (void) -{ - unsigned int prev_nmi_count[NR_CPUS]; - int j, cpu; - - if ( !nmi_watchdog ) - return 0; - - printk("Testing NMI watchdog --- "); - - for ( j = 0; j < smp_num_cpus; j++ ) - { - cpu = cpu_logical_map(j); - prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count; - } - sti(); - mdelay((10*1000)/nmi_hz); /* wait 10 ticks */ - - for ( j = 0; j < smp_num_cpus; j++ ) - { - cpu = cpu_logical_map(j); - if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 ) - printk("CPU#%d stuck. ", cpu); - else - printk("CPU#%d okay. ", cpu); - } - - printk("\n"); - - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ - if ( nmi_watchdog == NMI_LOCAL_APIC ) - nmi_hz = 1; - - return 0; -} - -static inline void nmi_pm_init(void) { } -#define __pminit __init - -/* - * Activate the NMI watchdog via the local APIC. - * Original code written by Keith Owens. - */ - -static void __pminit clear_msr_range(unsigned int base, unsigned int n) -{ - unsigned int i; - for ( i = 0; i < n; i++ ) - wrmsr(base+i, 0, 0); -} - -static void __pminit setup_k7_watchdog(void) -{ - unsigned int evntsel; - - nmi_perfctr_msr = MSR_K7_PERFCTR0; - - clear_msr_range(MSR_K7_EVNTSEL0, 4); - clear_msr_range(MSR_K7_PERFCTR0, 4); - - evntsel = K7_EVNTSEL_INT - | K7_EVNTSEL_OS - | K7_EVNTSEL_USR - | K7_NMI_EVENT; - - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); - Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); -} - -static void __pminit setup_p6_watchdog(void) -{ - unsigned int evntsel; - - nmi_perfctr_msr = MSR_P6_PERFCTR0; - - clear_msr_range(MSR_P6_EVNTSEL0, 2); - clear_msr_range(MSR_P6_PERFCTR0, 2); - - evntsel = P6_EVNTSEL_INT - | P6_EVNTSEL_OS - | P6_EVNTSEL_USR - | P6_NMI_EVENT; - - wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); - Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); -} - -static int __pminit setup_p4_watchdog(void) -{ - unsigned int misc_enable, dummy; - - rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); - if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) - return 0; - - nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; - - if ( logical_proc_id[smp_processor_id()] == 0 ) - { - if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) - clear_msr_range(0x3F1, 2); - /* MSR 0x3F0 seems to have a default value of 0xFC00, but current - docs doesn't fully define it, so leave it alone for now. */ - clear_msr_range(0x3A0, 31); - clear_msr_range(0x3C0, 6); - clear_msr_range(0x3C8, 6); - clear_msr_range(0x3E0, 2); - clear_msr_range(MSR_P4_CCCR0, 18); - clear_msr_range(MSR_P4_PERFCTR0, 18); - - wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); - Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); - apic_write(APIC_LVTPC, APIC_DM_NMI); - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); - } - else if ( logical_proc_id[smp_processor_id()] == 1 ) - { - wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1 & ~P4_CCCR_ENABLE, 0); - Dprintk("setting P4_IQ_COUNTER2 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1); - apic_write(APIC_LVTPC, APIC_DM_NMI); - wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0); - } - else - { - return 0; - } - - return 1; -} - -void __pminit setup_apic_nmi_watchdog(void) -{ - if (!nmi_watchdog) - return; - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) - return; - setup_k7_watchdog(); - break; - case X86_VENDOR_INTEL: - switch (boot_cpu_data.x86) { - case 6: - setup_p6_watchdog(); - break; - case 15: - if (!setup_p4_watchdog()) - return; - break; - default: - return; - } - break; - default: - return; - } - nmi_pm_init(); -} - - -static unsigned int -last_irq_sums [NR_CPUS], - alert_counter [NR_CPUS]; - -void touch_nmi_watchdog (void) -{ - int i; - for (i = 0; i < smp_num_cpus; i++) - alert_counter[i] = 0; -} - -void nmi_watchdog_tick (struct pt_regs * regs) -{ - extern spinlock_t console_lock; - extern void die(const char * str, struct pt_regs * regs, long err); - - int sum, cpu = smp_processor_id(); - - sum = apic_timer_irqs[cpu]; - - if ( (last_irq_sums[cpu] == sum) && watchdog_on ) - { - /* - * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds) - * before doing the oops ... - */ - alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) { - console_lock = SPIN_LOCK_UNLOCKED; - die("NMI Watchdog detected LOCKUP on CPU", regs, cpu); - } - } - else - { - last_irq_sums[cpu] = sum; - alert_counter[cpu] = 0; - } - - if ( nmi_perfctr_msr ) - { - if ( nmi_perfctr_msr == MSR_P4_IQ_COUNTER0 ) - { - if ( logical_proc_id[cpu] == 0 ) - { - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); - apic_write(APIC_LVTPC, APIC_DM_NMI); - wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); - } - else - { - wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0); - apic_write(APIC_LVTPC, APIC_DM_NMI); - wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1); - } - } - else - { - wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); - } - } -} diff --git a/xen/arch/i386/pci-dma.c b/xen/arch/i386/pci-dma.c deleted file mode 100644 index dd088fa1df..0000000000 --- a/xen/arch/i386/pci-dma.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * On i386 there is no hardware dynamic DMA address translation, - * so consistent alloc/free are merely page allocation/freeing. - * The rest of the dynamic DMA mapping interface is implemented - * in asm/pci.h. - */ - -#include -#include -#include -#include -#include - -void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle) -{ - void *ret; - int gfp = GFP_ATOMIC; - - if (hwdev == NULL || ((u32)hwdev->dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - ret = (void *)__get_free_pages(gfp, get_order(size)); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_bus(ret); - } - return ret; -} - -void pci_free_consistent(struct pci_dev *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - free_pages((unsigned long)vaddr, get_order(size)); -} diff --git a/xen/arch/i386/pci-i386.c b/xen/arch/i386/pci-i386.c deleted file mode 100644 index 6a5f672c48..0000000000 --- a/xen/arch/i386/pci-i386.c +++ /dev/null @@ -1,402 +0,0 @@ -/* - * Low-Level PCI Access for i386 machines - * - * Copyright 1993, 1994 Drew Eckhardt - * Visionary Computing - * (Unix and Linux consulting and custom programming) - * Drew@Colorado.EDU - * +1 (303) 786-7975 - * - * Drew's work was sponsored by: - * iX Multiuser Multitasking Magazine - * Hannover, Germany - * hm@ix.de - * - * Copyright 1997--2000 Martin Mares - * - * For more information, please consult the following manuals (look at - * http://www.pcisig.com/ for how to get them): - * - * PCI BIOS Specification - * PCI Local Bus Specification - * PCI to PCI Bridge Specification - * PCI System Design Guide - * - * - * CHANGELOG : - * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION - * Revision 2.0 present on 's ASUS mainboard. - * - * Jan 5, 1995 : Modified to probe PCI hardware at boot time by Frederic - * Potter, potter@cao-vlsi.ibp.fr - * - * Jan 10, 1995 : Modified to store the information about configured pci - * devices into a list, which can be accessed via /proc/pci by - * Curtis Varner, cvarner@cs.ucr.edu - * - * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter. - * Alpha version. Intel & UMC chipset support only. - * - * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code - * moved to drivers/pci/pci.c. - * - * Dec 7, 1996 : Added support for direct configuration access of boards - * with Intel compatible access schemes (tsbogend@alpha.franken.de) - * - * Feb 3, 1997 : Set internal functions to static, save/restore flags - * avoid dead locks reading broken PCI BIOS, werner@suse.de - * - * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS - * (mj@atrey.karlin.mff.cuni.cz) - * - * May 7, 1997 : Added some missing cli()'s. [mj] - * - * Jun 20, 1997 : Corrected problems in "conf1" type accesses. - * (paubert@iram.es) - * - * Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts - * and cleaned it up... Martin Mares - * - * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj] - * - * May 1, 1998 : Support for peer host bridges. [mj] - * - * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space - * can be accessed from interrupts even on SMP systems. [mj] - * - * August 1998 : Better support for peer host bridges and more paranoid - * checks for direct hardware access. Ugh, this file starts to look as - * a large gallery of common hardware bug workarounds (watch the comments) - * -- the PCI specs themselves are sane, but most implementors should be - * hit hard with \hammer scaled \magstep5. [mj] - * - * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj] - * - * Feb 8, 1999 : Added UM8886BF I/O address fixup. [mj] - * - * August 1999 : New resource management and configuration access stuff. [mj] - * - * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges. - * Based on ideas by Chris Frantz and David Hinds. [mj] - * - * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi - * for a lot of patience during testing. [mj] - * - * Oct 8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj] - */ - -#include -#include -#include -#include -#include -#include - -#include "pci-i386.h" - -void -pcibios_update_resource(struct pci_dev *dev, struct resource *root, - struct resource *res, int resource) -{ - u32 new, check; - int reg; - - new = res->start | (res->flags & PCI_REGION_FLAG_MASK); - if (resource < 6) { - reg = PCI_BASE_ADDRESS_0 + 4*resource; - } else if (resource == PCI_ROM_RESOURCE) { - res->flags |= PCI_ROM_ADDRESS_ENABLE; - new |= PCI_ROM_ADDRESS_ENABLE; - reg = dev->rom_base_reg; - } else { - /* Somebody might have asked allocation of a non-standard resource */ - return; - } - - pci_write_config_dword(dev, reg, new); - pci_read_config_dword(dev, reg, &check); - if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { - printk(KERN_ERR "PCI: Error while updating region " - "%s/%d (%08x != %08x)\n", dev->slot_name, resource, - new, check); - } -} - -/* - * We need to avoid collisions with `mirrored' VGA ports - * and other strange ISA hardware, so we always want the - * addresses to be allocated in the 0x000-0x0ff region - * modulo 0x400. - * - * Why? Because some silly external IO cards only decode - * the low 10 bits of the IO address. The 0x00-0xff region - * is reserved for motherboard devices that decode all 16 - * bits, so it's ok to allocate at, say, 0x2800-0x28ff, - * but we want to try to avoid allocating at 0x2900-0x2bff - * which might have be mirrored at 0x0100-0x03ff.. - */ -void -pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) -{ - if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; - - if (start & 0x300) { - start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } - } -} - - -/* - * Handle resources of PCI devices. If the world were perfect, we could - * just allocate all the resource regions and do nothing more. It isn't. - * On the other hand, we cannot just re-allocate all devices, as it would - * require us to know lots of host bridge internals. So we attempt to - * keep as much of the original configuration as possible, but tweak it - * when it's found to be wrong. - * - * Known BIOS problems we have to work around: - * - I/O or memory regions not configured - * - regions configured, but not enabled in the command register - * - bogus I/O addresses above 64K used - * - expansion ROMs left enabled (this may sound harmless, but given - * the fact the PCI specs explicitly allow address decoders to be - * shared between expansion ROMs and other resource regions, it's - * at least dangerous) - * - * Our solution: - * (1) Allocate resources for all buses behind PCI-to-PCI bridges. - * This gives us fixed barriers on where we can allocate. - * (2) Allocate resources for all enabled devices. If there is - * a collision, just mark the resource as unallocated. Also - * disable expansion ROMs during this step. - * (3) Try to allocate resources for disabled devices. If the - * resources were assigned correctly, everything goes well, - * if they weren't, they won't disturb allocation of other - * resources. - * (4) Assign new addresses to resources which were either - * not configured at all or misconfigured. If explicitly - * requested by the user, configure expansion ROM address - * as well. - */ - -static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) -{ - struct list_head *ln; - struct pci_bus *bus; - struct pci_dev *dev; - int idx; - struct resource *r, *pr; - - /* Depth-First Search on bus tree */ - for (ln=bus_list->next; ln != bus_list; ln=ln->next) { - bus = pci_bus_b(ln); - if ((dev = bus->self)) { - for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { - r = &dev->resource[idx]; - if (!r->start) - continue; - pr = pci_find_parent_resource(dev, r); - if (!pr || request_resource(pr, r) < 0) - printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name); - } - } - pcibios_allocate_bus_resources(&bus->children); - } -} - -static void __init pcibios_allocate_resources(int pass) -{ - struct pci_dev *dev; - int idx, disabled; - u16 command; - struct resource *r, *pr; - - pci_for_each_dev(dev) { - pci_read_config_word(dev, PCI_COMMAND, &command); - for(idx = 0; idx < 6; idx++) { - r = &dev->resource[idx]; - if (r->parent) /* Already allocated */ - continue; - if (!r->start) /* Address not assigned at all */ - continue; - if (r->flags & IORESOURCE_IO) - disabled = !(command & PCI_COMMAND_IO); - else - disabled = !(command & PCI_COMMAND_MEMORY); - if (pass == disabled) { - DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n", - r->start, r->end, r->flags, disabled, pass); - pr = pci_find_parent_resource(dev, r); - if (!pr || request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name); - /* We'll assign a new address later */ - r->end -= r->start; - r->start = 0; - } - } - } - if (!pass) { - r = &dev->resource[PCI_ROM_RESOURCE]; - if (r->flags & PCI_ROM_ADDRESS_ENABLE) { - /* Turn the ROM off, leave the resource region, but keep it unregistered. */ - u32 reg; - DBG("PCI: Switching off ROM of %s\n", dev->slot_name); - r->flags &= ~PCI_ROM_ADDRESS_ENABLE; - pci_read_config_dword(dev, dev->rom_base_reg, ®); - pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE); - } - } - } -} - -static void __init pcibios_assign_resources(void) -{ - struct pci_dev *dev; - int idx; - struct resource *r; - - pci_for_each_dev(dev) { - int class = dev->class >> 8; - - /* Don't touch classless devices and host bridges */ - if (!class || class == PCI_CLASS_BRIDGE_HOST) - continue; - - for(idx=0; idx<6; idx++) { - r = &dev->resource[idx]; - - /* - * Don't touch IDE controllers and I/O ports of video cards! - */ - if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) || - (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO))) - continue; - - /* - * We shall assign a new address to this resource, either because - * the BIOS forgot to do so or because we have decided the old - * address was unusable for some reason. - */ - if (!r->start && r->end) - pci_assign_resource(dev, idx); - } - - if (pci_probe & PCI_ASSIGN_ROMS) { - r = &dev->resource[PCI_ROM_RESOURCE]; - r->end -= r->start; - r->start = 0; - if (r->end) - pci_assign_resource(dev, PCI_ROM_RESOURCE); - } - } -} - -void __init pcibios_set_cacheline_size(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - pci_cache_line_size = 32 >> 2; - if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_cache_line_size = 64 >> 2; /* K7 & K8 */ - else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_cache_line_size = 128 >> 2; /* P4 */ -} - -void __init pcibios_resource_survey(void) -{ - DBG("PCI: Allocating resources\n"); - pcibios_allocate_bus_resources(&pci_root_buses); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); - pcibios_assign_resources(); -} - -int pcibios_enable_resources(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for(idx=0; idx<6; idx++) { - /* Only set up the requested stuff */ - if (!(mask & (1<resource[idx]; - if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (dev->resource[PCI_ROM_RESOURCE].start) - cmd |= PCI_COMMAND_MEMORY; - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -/* - * If we set up a device for bus mastering, we need to check the latency - * timer as certain crappy BIOSes forget to set it properly. - */ -unsigned int pcibios_max_latency = 255; - -void pcibios_set_master(struct pci_dev *dev) -{ - u8 lat; - pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); - if (lat < 16) - lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; - else if (lat > pcibios_max_latency) - lat = pcibios_max_latency; - else - return; - printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat); - pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); -} - -#if 0 -int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine) -{ - unsigned long prot; - - /* I/O space cannot be accessed via normal processor loads and - * stores on this platform. - */ - if (mmap_state == pci_mmap_io) - return -EINVAL; - - /* Leave vm_pgoff as-is, the PCI space address is the physical - * address on this platform. - */ - vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO); - - prot = pgprot_val(vma->vm_page_prot); - if (boot_cpu_data.x86 > 3) - prot |= _PAGE_PCD | _PAGE_PWT; - vma->vm_page_prot = __pgprot(prot); - - /* Write-combine setting is ignored, it is changed via the mtrr - * interfaces on this platform. - */ - if (remap_page_range(vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) - return -EAGAIN; - - return 0; -} -#endif diff --git a/xen/arch/i386/pci-i386.h b/xen/arch/i386/pci-i386.h deleted file mode 100644 index fe70b10166..0000000000 --- a/xen/arch/i386/pci-i386.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Low-Level PCI Access for i386 machines. - * - * (c) 1999 Martin Mares - */ - -#undef DEBUG - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -#define PCI_PROBE_BIOS 0x0001 -#define PCI_PROBE_CONF1 0x0002 -#define PCI_PROBE_CONF2 0x0004 -#define PCI_NO_SORT 0x0100 -#define PCI_BIOS_SORT 0x0200 -#define PCI_NO_CHECKS 0x0400 -#define PCI_ASSIGN_ROMS 0x1000 -#define PCI_BIOS_IRQ_SCAN 0x2000 -#define PCI_ASSIGN_ALL_BUSSES 0x4000 - -extern unsigned int pci_probe; - -/* pci-i386.c */ - -extern unsigned int pcibios_max_latency; -extern u8 pci_cache_line_size; - -void pcibios_resource_survey(void); -void pcibios_set_cacheline_size(void); -int pcibios_enable_resources(struct pci_dev *, int); - -/* pci-pc.c */ - -extern int pcibios_last_bus; -extern struct pci_bus *pci_root_bus; -extern struct pci_ops *pci_root_ops; - -/* pci-irq.c */ - -struct irq_info { - u8 bus, devfn; /* Bus, device and function */ - struct { - u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ - u16 bitmap; /* Available IRQs */ - } __attribute__((packed)) irq[4]; - u8 slot; /* Slot number, 0=onboard */ - u8 rfu; -} __attribute__((packed)); - -struct irq_routing_table { - u32 signature; /* PIRQ_SIGNATURE should be here */ - u16 version; /* PIRQ_VERSION */ - u16 size; /* Table size in bytes */ - u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ - u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ - u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ - u32 miniport_data; /* Crap */ - u8 rfu[11]; - u8 checksum; /* Modulo 256 checksum must give zero */ - struct irq_info slots[0]; -} __attribute__((packed)); - -extern unsigned int pcibios_irq_mask; - -void pcibios_irq_init(void); -void pcibios_fixup_irqs(void); -void pcibios_enable_irq(struct pci_dev *dev); diff --git a/xen/arch/i386/pci-irq.c b/xen/arch/i386/pci-irq.c deleted file mode 100644 index 87e93f12d1..0000000000 --- a/xen/arch/i386/pci-irq.c +++ /dev/null @@ -1,1092 +0,0 @@ -/* - * Low-Level PCI Support for PC -- Routing of Interrupts - * - * (c) 1999--2000 Martin Mares - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "pci-i386.h" - -#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) -#define PIRQ_VERSION 0x0100 - -int broken_hp_bios_irq9; - -static struct irq_routing_table *pirq_table; - -/* - * Never use: 0, 1, 2 (timer, keyboard, and cascade) - * Avoid using: 13, 14 and 15 (FP error and IDE). - * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse) - */ -unsigned int pcibios_irq_mask = 0xfff8; - -static int pirq_penalty[16] = { - 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000, - 0, 0, 0, 0, 1000, 100000, 100000, 100000 -}; - -struct irq_router { - char *name; - u16 vendor, device; - int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); - int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); -}; - -struct irq_router_handler { - u16 vendor; - int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); -}; - -/* - * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table. - */ - -static struct irq_routing_table * __init pirq_find_routing_table(void) -{ - u8 *addr; - struct irq_routing_table *rt; - int i; - u8 sum; - - for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { - rt = (struct irq_routing_table *) addr; - if (rt->signature != PIRQ_SIGNATURE || - rt->version != PIRQ_VERSION || - rt->size % 16 || - rt->size < sizeof(struct irq_routing_table)) - continue; - sum = 0; - for(i=0; isize; i++) - sum += addr[i]; - if (!sum) { - DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt); - return rt; - } - } - return NULL; -} - -/* - * If we have a IRQ routing table, use it to search for peer host - * bridges. It's a gross hack, but since there are no other known - * ways how to get a list of buses, we have to go this way. - */ - -static void __init pirq_peer_trick(void) -{ - struct irq_routing_table *rt = pirq_table; - u8 busmap[256]; - int i; - struct irq_info *e; - - memset(busmap, 0, sizeof(busmap)); - for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { - e = &rt->slots[i]; -#ifdef DEBUG - { - int j; - DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); - for(j=0; j<4; j++) - DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); - DBG("\n"); - } -#endif - busmap[e->bus] = 1; - } - for(i=1; i<256; i++) - /* - * It might be a secondary bus, but in this case its parent is already - * known (ascending bus order) and therefore pci_scan_bus returns immediately. - */ - if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL)) - printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i); - pcibios_last_bus = -1; -} - -/* - * Code for querying and setting of IRQ routes on various interrupt routers. - */ - -void eisa_set_level_irq(unsigned int irq) -{ - unsigned char mask = 1 << (irq & 7); - unsigned int port = 0x4d0 + (irq >> 3); - unsigned char val = inb(port); - - if (!(val & mask)) { - DBG(" -> edge"); - outb(val | mask, port); - } -} - -/* - * Common IRQ routing practice: nybbles in config space, - * offset by some magic constant. - */ -static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) -{ - u8 x; - unsigned reg = offset + (nr >> 1); - - pci_read_config_byte(router, reg, &x); - return (nr & 1) ? (x >> 4) : (x & 0xf); -} - -static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) -{ - u8 x; - unsigned reg = offset + (nr >> 1); - - pci_read_config_byte(router, reg, &x); - x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val); - pci_write_config_byte(router, reg, x); -} - -/* - * ALI pirq entries are damn ugly, and completely undocumented. - * This has been figured out from pirq tables, and it's not a pretty - * picture. - */ -static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; - - return irqmap[read_config_nybble(router, 0x48, pirq-1)]; -} - -static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 }; - unsigned int val = irqmap[irq]; - - if (val) { - write_config_nybble(router, 0x48, pirq-1, val); - return 1; - } - return 0; -} - -/* - * The Intel PIIX4 pirq rules are fairly simple: "pirq" is - * just a pointer to the config space. - */ -static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - u8 x; - - pci_read_config_byte(router, pirq, &x); - return (x < 16) ? x : 0; -} - -static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - pci_write_config_byte(router, pirq, irq); - return 1; -} - -/* - * The VIA pirq rules are nibble-based, like ALI, - * but without the ugly irq number munging. - * However, PIRQD is in the upper instead of lower nibble. - */ -static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq); -} - -static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq); - return 1; -} - -/* - * ITE 8330G pirq rules are nibble-based - * FIXME: pirqmap may be { 1, 0, 3, 2 }, - * 2+3 are both mapped to irq 9 on my system - */ -static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - static unsigned char pirqmap[4] = { 1, 0, 2, 3 }; - return read_config_nybble(router,0x43, pirqmap[pirq-1]); -} - -static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - static unsigned char pirqmap[4] = { 1, 0, 2, 3 }; - write_config_nybble(router, 0x43, pirqmap[pirq-1], irq); - return 1; -} - -/* - * OPTI: high four bits are nibble pointer.. - * I wonder what the low bits do? - */ -static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - return read_config_nybble(router, 0xb8, pirq >> 4); -} - -static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - write_config_nybble(router, 0xb8, pirq >> 4, irq); - return 1; -} - -/* - * Cyrix: nibble offset 0x5C - */ -static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - return read_config_nybble(router, 0x5C, (pirq-1)^1); -} - -static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - write_config_nybble(router, 0x5C, (pirq-1)^1, irq); - return 1; -} - -/* - * PIRQ routing for SiS 85C503 router used in several SiS chipsets. - * We have to deal with the following issues here: - * - vendors have different ideas about the meaning of link values - * - some onboard devices (integrated in the chipset) have special - * links and are thus routed differently (i.e. not via PCI INTA-INTD) - * - different revision of the router have a different layout for - * the routing registers, particularly for the onchip devices - * - * For all routing registers the common thing is we have one byte - * per routeable link which is defined as: - * bit 7 IRQ mapping enabled (0) or disabled (1) - * bits [6:4] reserved (sometimes used for onchip devices) - * bits [3:0] IRQ to map to - * allowed: 3-7, 9-12, 14-15 - * reserved: 0, 1, 2, 8, 13 - * - * The config-space registers located at 0x41/0x42/0x43/0x44 are - * always used to route the normal PCI INT A/B/C/D respectively. - * Apparently there are systems implementing PCI routing table using - * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. - * We try our best to handle both link mappings. - * - * Currently (2003-05-21) it appears most SiS chipsets follow the - * definition of routing registers from the SiS-5595 southbridge. - * According to the SiS 5595 datasheets the revision id's of the - * router (ISA-bridge) should be 0x01 or 0xb0. - * - * Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1. - * Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets. - * They seem to work with the current routing code. However there is - * some concern because of the two USB-OHCI HCs (original SiS 5595 - * had only one). YMMV. - * - * Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1: - * - * 0x61: IDEIRQ: - * bits [6:5] must be written 01 - * bit 4 channel-select primary (0), secondary (1) - * - * 0x62: USBIRQ: - * bit 6 OHCI function disabled (0), enabled (1) - * - * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved - * - * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved - * - * We support USBIRQ (in addition to INTA-INTD) and keep the - * IDE, ACPI and DAQ routing untouched as set by the BIOS. - * - * Currently the only reported exception is the new SiS 65x chipset - * which includes the SiS 69x southbridge. Here we have the 85C503 - * router revision 0x04 and there are changes in the register layout - * mostly related to the different USB HCs with USB 2.0 support. - * - * Onchip routing for router rev-id 0x04 (try-and-error observation) - * - * 0x60/0x61/0x62/0x63: 1xEHCI and 3xOHCI (companion) USB-HCs - * bit 6-4 are probably unused, not like 5595 - */ - -#define PIRQ_SIS_IRQ_MASK 0x0f -#define PIRQ_SIS_IRQ_DISABLE 0x80 -#define PIRQ_SIS_USB_ENABLE 0x40 -#define PIRQ_SIS_DETECT_REGISTER 0x40 - -/* return value: - * -1 on error - * 0 for PCI INTA-INTD - * 0 or enable bit mask to check or set for onchip functions - */ -static inline int pirq_sis5595_onchip(int pirq, int *reg) -{ - int ret = -1; - - *reg = pirq; - switch(pirq) { - case 0x01: - case 0x02: - case 0x03: - case 0x04: - *reg += 0x40; - case 0x41: - case 0x42: - case 0x43: - case 0x44: - ret = 0; - break; - - case 0x62: - ret = PIRQ_SIS_USB_ENABLE; /* documented for 5595 */ - break; - - case 0x61: - case 0x6a: - case 0x7e: - printk(KERN_INFO "SiS pirq: IDE/ACPI/DAQ mapping not implemented: (%u)\n", - (unsigned) pirq); - /* fall thru */ - default: - printk(KERN_INFO "SiS router unknown request: (%u)\n", - (unsigned) pirq); - break; - } - return ret; -} - -/* return value: - * -1 on error - * 0 for PCI INTA-INTD - * 0 or enable bit mask to check or set for onchip functions - */ -static inline int pirq_sis96x_onchip(int pirq, int *reg) -{ - int ret = -1; - - *reg = pirq; - switch(pirq) { - case 0x01: - case 0x02: - case 0x03: - case 0x04: - *reg += 0x40; - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x60: - case 0x61: - case 0x62: - case 0x63: - ret = 0; - break; - - default: - printk(KERN_INFO "SiS router unknown request: (%u)\n", - (unsigned) pirq); - break; - } - return ret; -} - - -static int pirq_sis5595_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - u8 x; - int reg, check; - - check = pirq_sis5595_onchip(pirq, ®); - if (check < 0) - return 0; - - pci_read_config_byte(router, reg, &x); - if (check != 0 && !(x & check)) - return 0; - - return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK); -} - -static int pirq_sis96x_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - u8 x; - int reg, check; - - check = pirq_sis96x_onchip(pirq, ®); - if (check < 0) - return 0; - - pci_read_config_byte(router, reg, &x); - if (check != 0 && !(x & check)) - return 0; - - return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK); -} - -static int pirq_sis5595_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - u8 x; - int reg, set; - - set = pirq_sis5595_onchip(pirq, ®); - if (set < 0) - return 0; - - x = (irq & PIRQ_SIS_IRQ_MASK); - if (x == 0) - x = PIRQ_SIS_IRQ_DISABLE; - else - x |= set; - - pci_write_config_byte(router, reg, x); - - return 1; -} - -static int pirq_sis96x_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - u8 x; - int reg, set; - - set = pirq_sis96x_onchip(pirq, ®); - if (set < 0) - return 0; - - x = (irq & PIRQ_SIS_IRQ_MASK); - if (x == 0) - x = PIRQ_SIS_IRQ_DISABLE; - else - x |= set; - - pci_write_config_byte(router, reg, x); - - return 1; -} - - -/* - * VLSI: nibble offset 0x74 - educated guess due to routing table and - * config space of VLSI 82C534 PCI-bridge/router (1004:0102) - * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard - * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6 - * for the busbridge to the docking station. - */ - -static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - if (pirq > 8) { - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); - return 0; - } - return read_config_nybble(router, 0x74, pirq-1); -} - -static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - if (pirq > 8) { - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); - return 0; - } - write_config_nybble(router, 0x74, pirq-1, irq); - return 1; -} - -/* - * ServerWorks: PCI interrupts mapped to system IRQ lines through Index - * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register - * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect - * register is a straight binary coding of desired PIC IRQ (low nibble). - * - * The 'link' value in the PIRQ table is already in the correct format - * for the Index register. There are some special index values: - * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1, - * and 0x03 for SMBus. - */ -static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - outb_p(pirq, 0xc00); - return inb(0xc01) & 0xf; -} - -static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - outb_p(pirq, 0xc00); - outb_p(irq, 0xc01); - return 1; -} - -/* Support for AMD756 PCI IRQ Routing - * Jhon H. Caicedo - * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced) - * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced) - * The AMD756 pirq rules are nibble-based - * offset 0x56 0-3 PIRQA 4-7 PIRQB - * offset 0x57 0-3 PIRQC 4-7 PIRQD - */ -static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - u8 irq; - irq = 0; - if (pirq <= 4) - { - irq = read_config_nybble(router, 0x56, pirq - 1); - } - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", - dev->vendor, dev->device, pirq, irq); - return irq; -} - -static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", - dev->vendor, dev->device, pirq, irq); - if (pirq <= 4) - { - write_config_nybble(router, 0x56, pirq - 1, irq); - } - return 1; -} - -#ifdef CONFIG_PCI_BIOS - -static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - struct pci_dev *bridge; - int pin = pci_get_interrupt_pin(dev, &bridge); - return pcibios_set_irq_routing(bridge, pin, irq); -} - -#endif - - -static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - /* We must not touch 440GX even if we have tables. 440GX has - different IRQ routing weirdness */ - if(pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0, NULL) || - pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2, NULL)) - return 0; - switch(device) - { - case PCI_DEVICE_ID_INTEL_82371FB_0: - case PCI_DEVICE_ID_INTEL_82371SB_0: - case PCI_DEVICE_ID_INTEL_82371AB_0: - case PCI_DEVICE_ID_INTEL_82371MX: - case PCI_DEVICE_ID_INTEL_82443MX_0: - case PCI_DEVICE_ID_INTEL_82801AA_0: - case PCI_DEVICE_ID_INTEL_82801AB_0: - case PCI_DEVICE_ID_INTEL_82801BA_0: - case PCI_DEVICE_ID_INTEL_82801BA_10: - case PCI_DEVICE_ID_INTEL_82801CA_0: - case PCI_DEVICE_ID_INTEL_82801CA_12: - case PCI_DEVICE_ID_INTEL_82801DB_0: - case PCI_DEVICE_ID_INTEL_82801E_0: - case PCI_DEVICE_ID_INTEL_82801EB_0: - case PCI_DEVICE_ID_INTEL_ESB_0: - r->name = "PIIX/ICH"; - r->get = pirq_piix_get; - r->set = pirq_piix_set; - return 1; - } - return 0; -} - -static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - /* FIXME: We should move some of the quirk fixup stuff here */ - switch(device) - { - case PCI_DEVICE_ID_VIA_82C586_0: - case PCI_DEVICE_ID_VIA_82C596: - case PCI_DEVICE_ID_VIA_82C686: - case PCI_DEVICE_ID_VIA_8231: - /* FIXME: add new ones for 8233/5 */ - r->name = "VIA"; - r->get = pirq_via_get; - r->set = pirq_via_set; - return 1; - } - return 0; -} - -static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_VLSI_82C534: - r->name = "VLSI 82C534"; - r->get = pirq_vlsi_get; - r->set = pirq_vlsi_set; - return 1; - } - return 0; -} - - -static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_SERVERWORKS_OSB4: - case PCI_DEVICE_ID_SERVERWORKS_CSB5: - r->name = "ServerWorks"; - r->get = pirq_serverworks_get; - r->set = pirq_serverworks_set; - return 1; - } - return 0; -} - -static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - u8 reg; - u16 devid; - - if (device != PCI_DEVICE_ID_SI_503) - return 0; - - /* - * In case of SiS south bridge, we need to detect the two - * kinds of routing tables we have seen so far (5595 and 96x). - * Since the maintain the same device ID, we need to do poke - * the PCI configuration space to find the router type we are - * dealing with. - */ - - /* - * Factoid: writing bit6 of register 0x40 of the router config space - * will make the SB to show up 0x096x inside the device id. Note, - * we need to restore register 0x40 after the device id poke. - */ - - pci_read_config_byte(router, PIRQ_SIS_DETECT_REGISTER, ®); - pci_write_config_byte(router, PIRQ_SIS_DETECT_REGISTER, reg | (1 << 6)); - pci_read_config_word(router, PCI_DEVICE_ID, &devid); - pci_write_config_byte(router, PIRQ_SIS_DETECT_REGISTER, reg); - - if ((devid & 0xfff0) == 0x0960) { - r->name = "SIS96x"; - r->get = pirq_sis96x_get; - r->set = pirq_sis96x_set; - DBG("PCI: Detecting SiS router at %02x:%02x : SiS096x detected\n", - rt->rtr_bus, rt->rtr_devfn); - } else { - r->name = "SIS5595"; - r->get = pirq_sis5595_get; - r->set = pirq_sis5595_set; - DBG("PCI: Detecting SiS router at %02x:%02x : SiS5595 detected\n", - rt->rtr_bus, rt->rtr_devfn); - } - return 1; -} - -static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_CYRIX_5520: - r->name = "NatSemi"; - r->get = pirq_cyrix_get; - r->set = pirq_cyrix_set; - return 1; - } - return 0; -} - -static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_OPTI_82C700: - r->name = "OPTI"; - r->get = pirq_opti_get; - r->set = pirq_opti_set; - return 1; - } - return 0; -} - -static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_ITE_IT8330G_0: - r->name = "ITE"; - r->get = pirq_ite_get; - r->set = pirq_ite_set; - return 1; - } - return 0; -} - -static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_AL_M1533: - r->name = "ALI"; - r->get = pirq_ali_get; - r->set = pirq_ali_set; - return 1; - /* Should add 156x some day */ - } - return 0; -} - -static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_AMD_VIPER_740B: - r->name = "AMD756"; - break; - case PCI_DEVICE_ID_AMD_VIPER_7413: - r->name = "AMD766"; - break; - case PCI_DEVICE_ID_AMD_VIPER_7443: - r->name = "AMD768"; - break; - default: - return 0; - } - r->get = pirq_amd756_get; - r->set = pirq_amd756_set; - return 1; -} - -static __initdata struct irq_router_handler pirq_routers[] = { - { PCI_VENDOR_ID_INTEL, intel_router_probe }, - { PCI_VENDOR_ID_AL, ali_router_probe }, - { PCI_VENDOR_ID_ITE, ite_router_probe }, - { PCI_VENDOR_ID_VIA, via_router_probe }, - { PCI_VENDOR_ID_OPTI, opti_router_probe }, - { PCI_VENDOR_ID_SI, sis_router_probe }, - { PCI_VENDOR_ID_CYRIX, cyrix_router_probe }, - { PCI_VENDOR_ID_VLSI, vlsi_router_probe }, - { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe }, - { PCI_VENDOR_ID_AMD, amd_router_probe }, - /* Someone with docs needs to add the ATI Radeon IGP */ - { 0, NULL } -}; -static struct irq_router pirq_router; -static struct pci_dev *pirq_router_dev; - -/* - * FIXME: should we have an option to say "generic for - * chipset" ? - */ - -static void __init pirq_find_router(struct irq_router *r) -{ - struct irq_routing_table *rt = pirq_table; - struct irq_router_handler *h; - -#ifdef CONFIG_PCI_BIOS - if (!rt->signature) { - printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n"); - r->set = pirq_bios_set; - r->name = "BIOS"; - return; - } -#endif - - /* Default unless a driver reloads it */ - r->name = "default"; - r->get = NULL; - r->set = NULL; - - DBG("PCI: Attempting to find IRQ router for %04x:%04x\n", - rt->rtr_vendor, rt->rtr_device); - - pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); - if (!pirq_router_dev) { - DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); - return; - } - - for( h = pirq_routers; h->vendor; h++) { - /* First look for a router match */ - if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) - break; - /* Fall back to a device match */ - if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device)) - break; - } - printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", - pirq_router.name, - pirq_router_dev->vendor, - pirq_router_dev->device, - pirq_router_dev->slot_name); -} - -static struct irq_info *pirq_get_info(struct pci_dev *dev) -{ - struct irq_routing_table *rt = pirq_table; - int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); - struct irq_info *info; - - for (info = rt->slots; entries--; info++) - if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) - return info; - return NULL; -} - -static void pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs) -{ -} - -static int pcibios_lookup_irq(struct pci_dev *dev, int assign) -{ - u8 pin; - struct irq_info *info; - int i, pirq, newirq; - int irq = 0; - u32 mask; - struct irq_router *r = &pirq_router; - struct pci_dev *dev2; - char *msg = NULL; - - if (!pirq_table) - return 0; - - /* Find IRQ routing entry */ - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - if (!pin) { - DBG(" -> no interrupt pin\n"); - return 0; - } - pin = pin - 1; - - DBG("IRQ for %s:%d", dev->slot_name, pin); - info = pirq_get_info(dev); - if (!info) { - DBG(" -> not found in routing table\n"); - return 0; - } - pirq = info->irq[pin].link; - mask = info->irq[pin].bitmap; - if (!pirq) { - DBG(" -> not routed\n"); - return 0; - } - DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); - mask &= pcibios_irq_mask; - - /* Work around broken HP Pavilion Notebooks which assign USB to - IRQ 9 even though it is actually wired to IRQ 11 */ - - if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) { - dev->irq = 11; - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11); - r->set(pirq_router_dev, dev, pirq, 11); - } - - /* - * Find the best IRQ to assign: use the one - * reported by the device if possible. - */ - newirq = dev->irq; - if (!newirq && assign) { - for (i = 0; i < 16; i++) { - if (!(mask & (1 << i))) - continue; - if (pirq_penalty[i] < pirq_penalty[newirq] && - !request_irq(i, pcibios_test_irq_handler, SA_SHIRQ, "pci-test", dev)) { - free_irq(i, dev); - newirq = i; - } - } - } - DBG(" -> newirq=%d", newirq); - - /* Check if it is hardcoded */ - if ((pirq & 0xf0) == 0xf0) { - irq = pirq & 0xf; - DBG(" -> hardcoded IRQ %d\n", irq); - msg = "Hardcoded"; - } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq))) { - DBG(" -> got IRQ %d\n", irq); - msg = "Found"; - } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { - DBG(" -> assigning IRQ %d", newirq); - if (r->set(pirq_router_dev, dev, pirq, newirq)) { - eisa_set_level_irq(newirq); - DBG(" ... OK\n"); - msg = "Assigned"; - irq = newirq; - } - } - - if (!irq) { - DBG(" ... failed\n"); - if (newirq && mask == (1 << newirq)) { - msg = "Guessed"; - irq = newirq; - } else - return 0; - } - printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name); - - /* Update IRQ for all devices with the same pirq value */ - pci_for_each_dev(dev2) { - pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); - if (!pin) - continue; - pin--; - info = pirq_get_info(dev2); - if (!info) - continue; - if (info->irq[pin].link == pirq) { - /* We refuse to override the dev->irq information. Give a warning! */ - if (dev2->irq && dev2->irq != irq) { - printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", - dev2->slot_name, dev2->irq, irq); - continue; - } - dev2->irq = irq; - pirq_penalty[irq]++; - if (dev != dev2) - printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name); - } - } - return 1; -} - -void __init pcibios_irq_init(void) -{ - DBG("PCI: IRQ init\n"); - pirq_table = pirq_find_routing_table(); -#ifdef CONFIG_PCI_BIOS - if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) - pirq_table = pcibios_get_irq_routing_table(); -#endif - if (pirq_table) { - pirq_peer_trick(); - pirq_find_router(&pirq_router); - if (pirq_table->exclusive_irqs) { - int i; - for (i=0; i<16; i++) - if (!(pirq_table->exclusive_irqs & (1 << i))) - pirq_penalty[i] += 100; - } - /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ - if (io_apic_assign_pci_irqs) - pirq_table = NULL; - } -} - -void __init pcibios_fixup_irqs(void) -{ - struct pci_dev *dev; - u8 pin; - - DBG("PCI: IRQ fixup\n"); - pci_for_each_dev(dev) { - /* - * If the BIOS has set an out of range IRQ number, just ignore it. - * Also keep track of which IRQ's are already in use. - */ - if (dev->irq >= 16) { - DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq); - dev->irq = 0; - } - /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ - if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) - pirq_penalty[dev->irq] = 0; - pirq_penalty[dev->irq]++; - } - - pci_for_each_dev(dev) { - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); -#ifdef CONFIG_X86_IO_APIC - /* - * Recalculate IRQ numbers if we use the I/O APIC. - */ - if (io_apic_assign_pci_irqs) - { - int irq; - - if (pin) { - pin--; /* interrupt pins are numbered starting from 1 */ - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); - /* - * Busses behind bridges are typically not listed in the MP-table. - * In this case we have to look up the IRQ based on the parent bus, - * parent slot, and pin number. The SMP code detects such bridged - * busses itself so we should get into this branch reliably. - */ - if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev * bridge = dev->bus->self; - - pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, - PCI_SLOT(bridge->devfn), pin); - if (irq >= 0) - printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n", - bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq); - } - if (irq >= 0) { - printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n", - dev->bus->number, PCI_SLOT(dev->devfn), pin, irq); - dev->irq = irq; - } - } - } -#endif - /* - * Still no IRQ? Try to lookup one... - */ - if (pin && !dev->irq) - pcibios_lookup_irq(dev, 0); - } -} - -void pcibios_penalize_isa_irq(int irq) -{ - /* - * If any ISAPnP device reports an IRQ in its list of possible - * IRQ's, we try to avoid assigning it to PCI devices. - */ - pirq_penalty[irq] += 100; -} - -void pcibios_enable_irq(struct pci_dev *dev) -{ - u8 pin; - extern int interrupt_line_quirk; - - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { - char *msg; - - /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ - if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) - return; - - if (io_apic_assign_pci_irqs) - msg = " Probably buggy MP table."; - else if (pci_probe & PCI_BIOS_IRQ_SCAN) - msg = ""; - else - msg = " Please try using pci=biosirq."; - printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", - 'A' + pin - 1, dev->slot_name, msg); - } - /* VIA bridges use interrupt line for apic/pci steering across - the V-Link */ - else if (interrupt_line_quirk) - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); - -} diff --git a/xen/arch/i386/pci-pc.c b/xen/arch/i386/pci-pc.c deleted file mode 100644 index e32472c826..0000000000 --- a/xen/arch/i386/pci-pc.c +++ /dev/null @@ -1,1538 +0,0 @@ -/* - * Low-Level PCI Support for PC - * - * (c) 1999--2000 Martin Mares - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/*#include */ -#include -#include -#include - -#include "pci-i386.h" - -extern int numnodes; -#define __KERNEL_CS __HYPERVISOR_CS -#define __KERNEL_DS __HYPERVISOR_DS - -unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2; - -int pcibios_last_bus = -1; -struct pci_bus *pci_root_bus = NULL; -struct pci_ops *pci_root_ops = NULL; - -int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL; -int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL; - -static int pci_using_acpi_prt = 0; - -#ifdef CONFIG_MULTIQUAD -#define BUS2QUAD(global) (mp_bus_id_to_node[global]) -#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) -#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) -#else -#define BUS2QUAD(global) (0) -#define BUS2LOCAL(global) (global) -#define QUADLOCAL2BUS(quad,local) (local) -#endif - -/* - * This interrupt-safe spinlock protects all accesses to PCI - * configuration space. - */ -static spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED; - - -/* - * Functions for accessing PCI configuration space with type 1 accesses - */ - -#ifdef CONFIG_PCI_DIRECT - -#ifdef CONFIG_MULTIQUAD -#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \ - (0x80000000 | (BUS2LOCAL(bus) << 16) | (dev << 11) | (fn << 8) | (reg & ~3)) - -static int pci_conf1_mq_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* CONFIG_MULTIQUAD */ -{ - unsigned long flags; - - if (bus > 255 || dev > 31 || fn > 7 || reg > 255) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus)); - - switch (len) { - case 1: - *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus)); - break; - case 2: - *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus)); - break; - case 4: - *value = inl_quad(0xCFC, BUS2QUAD(bus)); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int pci_conf1_mq_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* CONFIG_MULTIQUAD */ -{ - unsigned long flags; - - if (bus > 255 || dev > 31 || fn > 7 || reg > 255) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus)); - - switch (len) { - case 1: - outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus)); - break; - case 2: - outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus)); - break; - case 4: - outl_quad((u32)value, 0xCFC, BUS2QUAD(bus)); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int pci_conf1_read_mq_config_byte(struct pci_dev *dev, int where, u8 *value) -{ - int result; - u32 data; - - result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, &data); - - *value = (u8)data; - - return result; -} - -static int pci_conf1_read_mq_config_word(struct pci_dev *dev, int where, u16 *value) -{ - int result; - u32 data; - - result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, &data); - - *value = (u16)data; - - return result; -} - -static int pci_conf1_read_mq_config_dword(struct pci_dev *dev, int where, u32 *value) -{ - if (!value) - return -EINVAL; - - return pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static int pci_conf1_write_mq_config_byte(struct pci_dev *dev, int where, u8 value) -{ - return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, value); -} - -static int pci_conf1_write_mq_config_word(struct pci_dev *dev, int where, u16 value) -{ - return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, value); -} - -static int pci_conf1_write_mq_config_dword(struct pci_dev *dev, int where, u32 value) -{ - return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static struct pci_ops pci_direct_mq_conf1 = { - pci_conf1_read_mq_config_byte, - pci_conf1_read_mq_config_word, - pci_conf1_read_mq_config_dword, - pci_conf1_write_mq_config_byte, - pci_conf1_write_mq_config_word, - pci_conf1_write_mq_config_dword -}; - -#endif /* !CONFIG_MULTIQUAD */ -#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \ - (0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3)) - -static int pci_conf1_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* !CONFIG_MULTIQUAD */ -{ - unsigned long flags; - - if (bus > 255 || dev > 31 || fn > 7 || reg > 255) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8); - - switch (len) { - case 1: - *value = inb(0xCFC + (reg & 3)); - break; - case 2: - *value = inw(0xCFC + (reg & 2)); - break; - case 4: - *value = inl(0xCFC); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int pci_conf1_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* !CONFIG_MULTIQUAD */ -{ - unsigned long flags; - - if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8); - - switch (len) { - case 1: - outb((u8)value, 0xCFC + (reg & 3)); - break; - case 2: - outw((u16)value, 0xCFC + (reg & 2)); - break; - case 4: - outl((u32)value, 0xCFC); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -#undef PCI_CONF1_ADDRESS - -static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value) -{ - int result; - u32 data; - - result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, &data); - - *value = (u8)data; - - return result; -} - -static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value) -{ - int result; - u32 data; - - result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, &data); - - *value = (u16)data; - - return result; -} - -static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value) -{ - return pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value) -{ - return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, value); -} - -static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value) -{ - return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, value); -} - -static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value) -{ - return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static struct pci_ops pci_direct_conf1 = { - pci_conf1_read_config_byte, - pci_conf1_read_config_word, - pci_conf1_read_config_dword, - pci_conf1_write_config_byte, - pci_conf1_write_config_word, - pci_conf1_write_config_dword -}; - - -/* - * Functions for accessing PCI configuration space with type 2 accesses - */ - -#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg) - -static int pci_conf2_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) -{ - unsigned long flags; - - if (bus > 255 || dev > 31 || fn > 7 || reg > 255) - return -EINVAL; - - if (dev & 0x10) - return PCIBIOS_DEVICE_NOT_FOUND; - - spin_lock_irqsave(&pci_config_lock, flags); - - outb((u8)(0xF0 | (fn << 1)), 0xCF8); - outb((u8)bus, 0xCFA); - - switch (len) { - case 1: - *value = inb(PCI_CONF2_ADDRESS(dev, reg)); - break; - case 2: - *value = inw(PCI_CONF2_ADDRESS(dev, reg)); - break; - case 4: - *value = inl(PCI_CONF2_ADDRESS(dev, reg)); - break; - } - - outb (0, 0xCF8); - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int pci_conf2_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) -{ - unsigned long flags; - - if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) - return -EINVAL; - - if (dev & 0x10) - return PCIBIOS_DEVICE_NOT_FOUND; - - spin_lock_irqsave(&pci_config_lock, flags); - - outb((u8)(0xF0 | (fn << 1)), 0xCF8); - outb((u8)bus, 0xCFA); - - switch (len) { - case 1: - outb ((u8)value, PCI_CONF2_ADDRESS(dev, reg)); - break; - case 2: - outw ((u16)value, PCI_CONF2_ADDRESS(dev, reg)); - break; - case 4: - outl ((u32)value, PCI_CONF2_ADDRESS(dev, reg)); - break; - } - - outb (0, 0xCF8); - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -#undef PCI_CONF2_ADDRESS - -static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value) -{ - int result; - u32 data; - result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, &data); - *value = (u8)data; - return result; -} - -static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value) -{ - int result; - u32 data; - result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, &data); - *value = (u16)data; - return result; -} - -static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value) -{ - return pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value) -{ - return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, value); -} - -static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value) -{ - return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, value); -} - -static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value) -{ - return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static struct pci_ops pci_direct_conf2 = { - pci_conf2_read_config_byte, - pci_conf2_read_config_word, - pci_conf2_read_config_dword, - pci_conf2_write_config_byte, - pci_conf2_write_config_word, - pci_conf2_write_config_dword -}; - - -/* - * Before we decide to use direct hardware access mechanisms, we try to do some - * trivial checks to ensure it at least _seems_ to be working -- we just test - * whether bus 00 contains a host bridge (this is similar to checking - * techniques used in XFree86, but ours should be more reliable since we - * attempt to make use of direct access hints provided by the PCI BIOS). - * - * This should be close to trivial, but it isn't, because there are buggy - * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. - */ -static int __devinit pci_sanity_check(struct pci_ops *o) -{ - u16 x; - struct pci_bus bus; /* Fake bus and device */ - struct pci_dev dev; - - if (pci_probe & PCI_NO_CHECKS) - return 1; - bus.number = 0; - dev.bus = &bus; - for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++) - if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) && - (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) || - (!o->read_word(&dev, PCI_VENDOR_ID, &x) && - (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ))) - return 1; - DBG("PCI: Sanity check failed\n"); - return 0; -} - -static struct pci_ops * __devinit pci_check_direct(void) -{ - unsigned int tmp; - unsigned long flags; - - __save_flags(flags); __cli(); - - /* - * Check if configuration type 1 works. - */ - if (pci_probe & PCI_PROBE_CONF1) { - outb (0x01, 0xCFB); - tmp = inl (0xCF8); - outl (0x80000000, 0xCF8); - if (inl (0xCF8) == 0x80000000 && - pci_sanity_check(&pci_direct_conf1)) { - outl (tmp, 0xCF8); - __restore_flags(flags); - printk(KERN_INFO "PCI: Using configuration type 1\n"); - request_region(0xCF8, 8, "PCI conf1"); - -#ifdef CONFIG_MULTIQUAD - /* Multi-Quad has an extended PCI Conf1 */ - if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) - return &pci_direct_mq_conf1; -#endif - return &pci_direct_conf1; - } - outl (tmp, 0xCF8); - } - - /* - * Check if configuration type 2 works. - */ - if (pci_probe & PCI_PROBE_CONF2) { - outb (0x00, 0xCFB); - outb (0x00, 0xCF8); - outb (0x00, 0xCFA); - if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - __restore_flags(flags); - printk(KERN_INFO "PCI: Using configuration type 2\n"); - request_region(0xCF8, 4, "PCI conf2"); - return &pci_direct_conf2; - } - } - - __restore_flags(flags); - return NULL; -} - -#endif - -/* - * BIOS32 and PCI BIOS handling. - */ - -#ifdef CONFIG_PCI_BIOS - -#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX -#define PCIBIOS_PCI_BIOS_PRESENT 0xb101 -#define PCIBIOS_FIND_PCI_DEVICE 0xb102 -#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103 -#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106 -#define PCIBIOS_READ_CONFIG_BYTE 0xb108 -#define PCIBIOS_READ_CONFIG_WORD 0xb109 -#define PCIBIOS_READ_CONFIG_DWORD 0xb10a -#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b -#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c -#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d -#define PCIBIOS_GET_ROUTING_OPTIONS 0xb10e -#define PCIBIOS_SET_PCI_HW_INT 0xb10f - -/* BIOS32 signature: "_32_" */ -#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) - -/* PCI signature: "PCI " */ -#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24)) - -/* PCI service signature: "$PCI" */ -#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24)) - -/* PCI BIOS hardware mechanism flags */ -#define PCIBIOS_HW_TYPE1 0x01 -#define PCIBIOS_HW_TYPE2 0x02 -#define PCIBIOS_HW_TYPE1_SPEC 0x10 -#define PCIBIOS_HW_TYPE2_SPEC 0x20 - -/* - * This is the standard structure used to identify the entry point - * to the BIOS32 Service Directory, as documented in - * Standard BIOS 32-bit Service Directory Proposal - * Revision 0.4 May 24, 1993 - * Phoenix Technologies Ltd. - * Norwood, MA - * and the PCI BIOS specification. - */ - -union bios32 { - struct { - unsigned long signature; /* _32_ */ - unsigned long entry; /* 32 bit physical address */ - unsigned char revision; /* Revision level, 0 */ - unsigned char length; /* Length in paragraphs should be 01 */ - unsigned char checksum; /* All bytes must add up to zero */ - unsigned char reserved[5]; /* Must be zero */ - } fields; - char chars[16]; -}; - -/* - * Physical address of the service directory. I don't know if we're - * allowed to have more than one of these or not, so just in case - * we'll make pcibios_present() take a memory start parameter and store - * the array there. - */ - -static struct { - unsigned long address; - unsigned short segment; -} bios32_indirect = { 0, __KERNEL_CS }; - -/* - * Returns the entry point for the given service, NULL on error - */ - -static unsigned long bios32_service(unsigned long service) -{ - unsigned char return_code; /* %al */ - unsigned long address; /* %ebx */ - unsigned long length; /* %ecx */ - unsigned long entry; /* %edx */ - unsigned long flags; - - __save_flags(flags); __cli(); - __asm__("lcall *(%%edi); cld" - : "=a" (return_code), - "=b" (address), - "=c" (length), - "=d" (entry) - : "0" (service), - "1" (0), - "D" (&bios32_indirect)); - __restore_flags(flags); - - switch (return_code) { - case 0: - return address + entry; - case 0x80: /* Not present */ - printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); - return 0; - default: /* Shouldn't happen */ - printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", - service, return_code); - return 0; - } -} - -static struct { - unsigned long address; - unsigned short segment; -} pci_indirect = { 0, __KERNEL_CS }; - -static int pci_bios_present; - -static int __devinit check_pcibios(void) -{ - u32 signature, eax, ebx, ecx; - u8 status, major_ver, minor_ver, hw_mech; - unsigned long flags, pcibios_entry; - - if ((pcibios_entry = bios32_service(PCI_SERVICE))) { - pci_indirect.address = pcibios_entry + PAGE_OFFSET; - - __save_flags(flags); __cli(); - __asm__( - "lcall *(%%edi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=d" (signature), - "=a" (eax), - "=b" (ebx), - "=c" (ecx) - : "1" (PCIBIOS_PCI_BIOS_PRESENT), - "D" (&pci_indirect) - : "memory"); - __restore_flags(flags); - - status = (eax >> 8) & 0xff; - hw_mech = eax & 0xff; - major_ver = (ebx >> 8) & 0xff; - minor_ver = ebx & 0xff; - if (pcibios_last_bus < 0) - pcibios_last_bus = ecx & 0xff; - DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n", - status, hw_mech, major_ver, minor_ver, pcibios_last_bus); - if (status || signature != PCI_SIGNATURE) { - printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n", - status, signature); - return 0; - } - printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n", - major_ver, minor_ver, pcibios_entry, pcibios_last_bus); -#ifdef CONFIG_PCI_DIRECT - if (!(hw_mech & PCIBIOS_HW_TYPE1)) - pci_probe &= ~PCI_PROBE_CONF1; - if (!(hw_mech & PCIBIOS_HW_TYPE2)) - pci_probe &= ~PCI_PROBE_CONF2; -#endif - return 1; - } - return 0; -} - -static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id, - unsigned short index, unsigned char *bus, unsigned char *device_fn) -{ - unsigned short bx; - unsigned short ret; - - __asm__("lcall *(%%edi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=b" (bx), - "=a" (ret) - : "1" (PCIBIOS_FIND_PCI_DEVICE), - "c" (device_id), - "d" (vendor), - "S" ((int) index), - "D" (&pci_indirect)); - *bus = (bx >> 8) & 0xff; - *device_fn = bx & 0xff; - return (int) (ret & 0xff00) >> 8; -} - -static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) -{ - unsigned long result = 0; - unsigned long flags; - unsigned long bx = ((bus << 8) | (dev << 3) | fn); - - if (bus > 255 || dev > 31 || fn > 7 || reg > 255) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - switch (len) { - case 1: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=c" (*value), - "=a" (result) - : "1" (PCIBIOS_READ_CONFIG_BYTE), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - case 2: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=c" (*value), - "=a" (result) - : "1" (PCIBIOS_READ_CONFIG_WORD), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - case 4: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=c" (*value), - "=a" (result) - : "1" (PCIBIOS_READ_CONFIG_DWORD), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return (int)((result & 0xff00) >> 8); -} - -static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) -{ - unsigned long result = 0; - unsigned long flags; - unsigned long bx = ((bus << 8) | (dev << 3) | fn); - - if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - switch (len) { - case 1: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=a" (result) - : "0" (PCIBIOS_WRITE_CONFIG_BYTE), - "c" (value), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - case 2: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=a" (result) - : "0" (PCIBIOS_WRITE_CONFIG_WORD), - "c" (value), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - case 4: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=a" (result) - : "0" (PCIBIOS_WRITE_CONFIG_DWORD), - "c" (value), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return (int)((result & 0xff00) >> 8); -} - -static int pci_bios_read_config_byte(struct pci_dev *dev, int where, u8 *value) -{ - int result; - u32 data; - - if (!value) - BUG(); - - result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, &data); - - *value = (u8)data; - - return result; -} - -static int pci_bios_read_config_word(struct pci_dev *dev, int where, u16 *value) -{ - int result; - u32 data; - - if (!value) - BUG(); - - result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, &data); - - *value = (u16)data; - - return result; -} - -static int pci_bios_read_config_dword(struct pci_dev *dev, int where, u32 *value) -{ - if (!value) - BUG(); - - return pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - -static int pci_bios_write_config_byte(struct pci_dev *dev, int where, u8 value) -{ - return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 1, value); -} - -static int pci_bios_write_config_word(struct pci_dev *dev, int where, u16 value) -{ - return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 2, value); -} - -static int pci_bios_write_config_dword(struct pci_dev *dev, int where, u32 value) -{ - return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), where, 4, value); -} - - -/* - * Function table for BIOS32 access - */ - -static struct pci_ops pci_bios_access = { - pci_bios_read_config_byte, - pci_bios_read_config_word, - pci_bios_read_config_dword, - pci_bios_write_config_byte, - pci_bios_write_config_word, - pci_bios_write_config_dword -}; - -/* - * Try to find PCI BIOS. - */ - -static struct pci_ops * __devinit pci_find_bios(void) -{ - union bios32 *check; - unsigned char sum; - int i, length; - - /* - * Follow the standard procedure for locating the BIOS32 Service - * directory by scanning the permissible address range from - * 0xe0000 through 0xfffff for a valid BIOS32 structure. - */ - - for (check = (union bios32 *) __va(0xe0000); - check <= (union bios32 *) __va(0xffff0); - ++check) { - if (check->fields.signature != BIOS32_SIGNATURE) - continue; - length = check->fields.length * 16; - if (!length) - continue; - sum = 0; - for (i = 0; i < length ; ++i) - sum += check->chars[i]; - if (sum != 0) - continue; - if (check->fields.revision != 0) { - printk("PCI: unsupported BIOS32 revision %d at 0x%p\n", - check->fields.revision, check); - continue; - } - DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check); - if (check->fields.entry >= 0x100000) { - printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check); - return NULL; - } else { - unsigned long bios32_entry = check->fields.entry; - DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry); - bios32_indirect.address = bios32_entry + PAGE_OFFSET; - if (check_pcibios()) - return &pci_bios_access; - } - break; /* Hopefully more than one BIOS32 cannot happen... */ - } - - return NULL; -} - -/* - * Sort the device list according to PCI BIOS. Nasty hack, but since some - * fool forgot to define the `correct' device order in the PCI BIOS specs - * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels - * which used BIOS ordering, we are bound to do this... - */ - -static void __devinit pcibios_sort(void) -{ - LIST_HEAD(sorted_devices); - struct list_head *ln; - struct pci_dev *dev, *d; - int idx, found; - unsigned char bus, devfn; - - DBG("PCI: Sorting device list...\n"); - while (!list_empty(&pci_devices)) { - ln = pci_devices.next; - dev = pci_dev_g(ln); - idx = found = 0; - while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) { - idx++; - for (ln=pci_devices.next; ln != &pci_devices; ln=ln->next) { - d = pci_dev_g(ln); - if (d->bus->number == bus && d->devfn == devfn) { - list_del(&d->global_list); - list_add_tail(&d->global_list, &sorted_devices); - if (d == dev) - found = 1; - break; - } - } - if (ln == &pci_devices) { - printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn); - /* - * We must not continue scanning as several buggy BIOSes - * return garbage after the last device. Grr. - */ - break; - } - } - if (!found) { - printk(KERN_WARNING "PCI: Device %02x:%02x not found by BIOS\n", - dev->bus->number, dev->devfn); - list_del(&dev->global_list); - list_add_tail(&dev->global_list, &sorted_devices); - } - } - list_splice(&sorted_devices, &pci_devices); -} - -/* - * BIOS Functions for IRQ Routing - */ - -struct irq_routing_options { - u16 size; - struct irq_info *table; - u16 segment; -} __attribute__((packed)); - -struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void) -{ - struct irq_routing_options opt; - struct irq_routing_table *rt = NULL; - int ret, map; - unsigned long page; - - if (!pci_bios_present) - return NULL; - page = __get_free_page(GFP_KERNEL); - if (!page) - return NULL; - opt.table = (struct irq_info *) page; - opt.size = PAGE_SIZE; - opt.segment = __KERNEL_DS; - - DBG("PCI: Fetching IRQ routing table... "); - __asm__("push %%es\n\t" - "push %%ds\n\t" - "pop %%es\n\t" - "lcall *(%%esi); cld\n\t" - "pop %%es\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=a" (ret), - "=b" (map), - "+m" (opt) - : "0" (PCIBIOS_GET_ROUTING_OPTIONS), - "1" (0), - "D" ((long) &opt), - "S" (&pci_indirect)); - DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); - if (ret & 0xff00) - printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff); - else if (opt.size) { - rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL); - if (rt) { - memset(rt, 0, sizeof(struct irq_routing_table)); - rt->size = opt.size + sizeof(struct irq_routing_table); - rt->exclusive_irqs = map; - memcpy(rt->slots, (void *) page, opt.size); - printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n"); - } - } - free_page(page); - return rt; -} - - -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) -{ - int ret; - - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=a" (ret) - : "0" (PCIBIOS_SET_PCI_HW_INT), - "b" ((dev->bus->number << 8) | dev->devfn), - "c" ((irq << 8) | (pin + 10)), - "S" (&pci_indirect)); - return !(ret & 0xff00); -} - -#endif - -/* - * Several buggy motherboards address only 16 devices and mirror - * them to next 16 IDs. We try to detect this `feature' on all - * primary buses (those containing host bridges as they are - * expected to be unique) and remove the ghost devices. - */ - -static void __devinit pcibios_fixup_ghosts(struct pci_bus *b) -{ - struct list_head *ln, *mn; - struct pci_dev *d, *e; - int mirror = PCI_DEVFN(16,0); - int seen_host_bridge = 0; - int i; - - DBG("PCI: Scanning for ghost devices on bus %d\n", b->number); - for (ln=b->devices.next; ln != &b->devices; ln=ln->next) { - d = pci_dev_b(ln); - if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST) - seen_host_bridge++; - for (mn=ln->next; mn != &b->devices; mn=mn->next) { - e = pci_dev_b(mn); - if (e->devfn != d->devfn + mirror || - e->vendor != d->vendor || - e->device != d->device || - e->class != d->class) - continue; - for(i=0; iresource[i].start != d->resource[i].start || - e->resource[i].end != d->resource[i].end || - e->resource[i].flags != d->resource[i].flags) - continue; - break; - } - if (mn == &b->devices) - return; - } - if (!seen_host_bridge) - return; - printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number); - - ln = &b->devices; - while (ln->next != &b->devices) { - d = pci_dev_b(ln->next); - if (d->devfn >= mirror) { - list_del(&d->global_list); - list_del(&d->bus_list); - kfree(d); - } else - ln = ln->next; - } -} - -/* - * Discover remaining PCI buses in case there are peer host bridges. - * We use the number of last PCI bus provided by the PCI BIOS. - */ -static void __devinit pcibios_fixup_peer_bridges(void) -{ - int n; - struct pci_bus bus; - struct pci_dev dev; - u16 l; - - if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) - return; - DBG("PCI: Peer bridge fixup\n"); - for (n=0; n <= pcibios_last_bus; n++) { - if (pci_bus_exists(&pci_root_buses, n)) - continue; - bus.number = n; - bus.ops = pci_root_ops; - dev.bus = &bus; - for(dev.devfn=0; dev.devfn<256; dev.devfn += 8) - if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) && - l != 0x0000 && l != 0xffff) { - DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l); - printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus(n, pci_root_ops, NULL); - break; - } - } -} - -/* - * Exceptions for specific devices. Usually work-arounds for fatal design flaws. - */ - -static void __devinit pci_fixup_i450nx(struct pci_dev *d) -{ - /* - * i450NX -- Find and scan all secondary buses on all PXB's. - */ - int pxb, reg; - u8 busno, suba, subb; -#ifdef CONFIG_MULTIQUAD - int quad = BUS2QUAD(d->bus->number); -#endif - printk("PCI: Searching for i450NX host bridges on %s\n", d->slot_name); - reg = 0xd0; - for(pxb=0; pxb<2; pxb++) { - pci_read_config_byte(d, reg++, &busno); - pci_read_config_byte(d, reg++, &suba); - pci_read_config_byte(d, reg++, &subb); - DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); - if (busno) - pci_scan_bus(QUADLOCAL2BUS(quad,busno), pci_root_ops, NULL); /* Bus A */ - if (suba < subb) - pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), pci_root_ops, NULL); /* Bus B */ - } - pcibios_last_bus = -1; -} - -static void __devinit pci_fixup_i450gx(struct pci_dev *d) -{ - /* - * i450GX and i450KX -- Find and scan all secondary buses. - * (called separately for each PCI bridge found) - */ - u8 busno; - pci_read_config_byte(d, 0x4a, &busno); - printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", d->slot_name, busno); - pci_scan_bus(busno, pci_root_ops, NULL); - pcibios_last_bus = -1; -} - -static void __devinit pci_fixup_umc_ide(struct pci_dev *d) -{ - /* - * UM8886BF IDE controller sets region type bits incorrectly, - * therefore they look like memory despite of them being I/O. - */ - int i; - - printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", d->slot_name); - for(i=0; i<4; i++) - d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO; -} - -static void __devinit pci_fixup_ncr53c810(struct pci_dev *d) -{ - /* - * NCR 53C810 returns class code 0 (at least on some systems). - * Fix class to be PCI_CLASS_STORAGE_SCSI - */ - if (!d->class) { - printk("PCI: fixing NCR 53C810 class code for %s\n", d->slot_name); - d->class = PCI_CLASS_STORAGE_SCSI << 8; - } -} - -static void __devinit pci_fixup_ide_bases(struct pci_dev *d) -{ - int i; - - /* - * PCI IDE controllers use non-standard I/O port decoding, respect it. - */ - if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE) - return; - DBG("PCI: IDE base address fixup for %s\n", d->slot_name); - for(i=0; i<4; i++) { - struct resource *r = &d->resource[i]; - if ((r->start & ~0x80) == 0x374) { - r->start |= 2; - r->end = r->start; - } - } -} - -static void __devinit pci_fixup_ide_trash(struct pci_dev *d) -{ - int i; - - /* - * There exist PCI IDE controllers which have utter garbage - * in first four base registers. Ignore that. - */ - DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name); - for(i=0; i<4; i++) - d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0; -} - -static void __devinit pci_fixup_latency(struct pci_dev *d) -{ - /* - * SiS 5597 and 5598 chipsets require latency timer set to - * at most 32 to avoid lockups. - */ - DBG("PCI: Setting max latency to 32\n"); - pcibios_max_latency = 32; -} - -static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d) -{ - /* - * PIIX4 ACPI device: hardwired IRQ9 - */ - d->irq = 9; -} - -/* - * Addresses issues with problems in the memory write queue timer in - * certain VIA Northbridges. This bugfix is per VIA's specifications, - * except for the KL133/KM133: clearing bit 5 on those Northbridges seems - * to trigger a bug in its integrated ProSavage video card, which - * causes screen corruption. We only clear bits 6 and 7 for that chipset, - * until VIA can provide us with definitive information on why screen - * corruption occurs, and what exactly those bits do. - * - * VIA 8363,8622,8361 Northbridges: - * - bits 5, 6, 7 at offset 0x55 need to be turned off - * VIA 8367 (KT266x) Northbridges: - * - bits 5, 6, 7 at offset 0x95 need to be turned off - * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges: - * - bits 6, 7 at offset 0x55 need to be turned off - */ - -#define VIA_8363_KL133_REVISION_ID 0x81 -#define VIA_8363_KM133_REVISION_ID 0x84 - -static void __init pci_fixup_via_northbridge_bug(struct pci_dev *d) -{ - u8 v; - u8 revision; - int where = 0x55; - int mask = 0x1f; /* clear bits 5, 6, 7 by default */ - - pci_read_config_byte(d, PCI_REVISION_ID, &revision); - - if (d->device == PCI_DEVICE_ID_VIA_8367_0) { - /* fix pci bus latency issues resulted by NB bios error - it appears on bug free^Wreduced kt266x's bios forces - NB latency to zero */ - pci_write_config_byte(d, PCI_LATENCY_TIMER, 0); - - where = 0x95; /* the memory write queue timer register is - different for the KT266x's: 0x95 not 0x55 */ - } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 && - (revision == VIA_8363_KL133_REVISION_ID || - revision == VIA_8363_KM133_REVISION_ID)) { - mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5 - causes screen corruption on the KL133/KM133 */ - } - - pci_read_config_byte(d, where, &v); - if (v & ~mask) { - printk("Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \ - d->device, revision, where, v, mask, v & mask); - v &= mask; - pci_write_config_byte(d, where, v); - } -} - -/* - * For some reasons Intel decided that certain parts of their - * 815, 845 and some other chipsets must look like PCI-to-PCI bridges - * while they are obviously not. The 82801 family (AA, AB, BAM/CAM, - * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according - * to Intel terminology. These devices do forward all addresses from - * system to PCI bus no matter what are their window settings, so they are - * "transparent" (or subtractive decoding) from programmers point of view. - */ -static void __init pci_fixup_transparent_bridge(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && - (dev->device & 0xff00) == 0x2400) - dev->transparent = 1; -} - -struct pci_fixup pcibios_fixups[] = { - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513, pci_fixup_ide_trash }, - { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810 }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge }, - { 0 } -}; - -/* - * Called after each bus is probed, but before its children - * are examined. - */ - -void __devinit pcibios_fixup_bus(struct pci_bus *b) -{ - pcibios_fixup_ghosts(b); - pci_read_bridge_bases(b); -} - -struct pci_bus * __devinit pcibios_scan_root(int busnum) -{ - struct list_head *list; - struct pci_bus *bus; - - list_for_each(list, &pci_root_buses) { - bus = pci_bus_b(list); - if (bus->number == busnum) { - /* Already scanned */ - return bus; - } - } - - printk("PCI: Probing PCI hardware (bus %02x)\n", busnum); - - return pci_scan_bus(busnum, pci_root_ops, NULL); -} - -void __devinit pcibios_config_init(void) -{ - /* - * Try all known PCI access methods. Note that we support using - * both PCI BIOS and direct access, with a preference for direct. - */ - -#ifdef CONFIG_PCI_DIRECT - struct pci_ops *tmp = NULL; -#endif - - -#ifdef CONFIG_PCI_BIOS - if ((pci_probe & PCI_PROBE_BIOS) - && ((pci_root_ops = pci_find_bios()))) { - pci_probe |= PCI_BIOS_SORT; - pci_bios_present = 1; - pci_config_read = pci_bios_read; - pci_config_write = pci_bios_write; - } -#endif - -#ifdef CONFIG_PCI_DIRECT - if ((pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2)) - && (tmp = pci_check_direct())) { - pci_root_ops = tmp; - if (pci_root_ops == &pci_direct_conf1) { - pci_config_read = pci_conf1_read; - pci_config_write = pci_conf1_write; - } - else { - pci_config_read = pci_conf2_read; - pci_config_write = pci_conf2_write; - } - } -#endif - - return; -} - -void __init pcibios_init(void) -{ - int quad; - - if (!pci_root_ops) - pcibios_config_init(); - if (!pci_root_ops) { - printk(KERN_WARNING "PCI: System does not support PCI\n"); - return; - } - - pcibios_set_cacheline_size(); - - printk(KERN_INFO "PCI: Probing PCI hardware\n"); -#ifdef CONFIG_ACPI_PCI - if (!acpi_noirq && !acpi_pci_irq_init()) { - pci_using_acpi_prt = 1; - printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); - printk(KERN_INFO "PCI: if you experience problems, try using option 'pci=noacpi' or even 'acpi=off'\n"); - } -#endif - if (!pci_using_acpi_prt) { - pci_root_bus = pcibios_scan_root(0); - pcibios_irq_init(); - pcibios_fixup_peer_bridges(); - pcibios_fixup_irqs(); - } - if (clustered_apic_mode && (numnodes > 1)) { - for (quad = 1; quad < numnodes; ++quad) { - printk("Scanning PCI bus %d for quad %d\n", - QUADLOCAL2BUS(quad,0), quad); - pci_scan_bus(QUADLOCAL2BUS(quad,0), - pci_root_ops, NULL); - } - } - - pcibios_resource_survey(); - -#ifdef CONFIG_PCI_BIOS - if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT)) - pcibios_sort(); -#endif -} - -char * __devinit pcibios_setup(char *str) -{ - if (!strcmp(str, "off")) { - pci_probe = 0; - return NULL; - } -#ifdef CONFIG_PCI_BIOS - else if (!strcmp(str, "bios")) { - pci_probe = PCI_PROBE_BIOS; - return NULL; - } else if (!strcmp(str, "nobios")) { - pci_probe &= ~PCI_PROBE_BIOS; - return NULL; - } else if (!strcmp(str, "nosort")) { - pci_probe |= PCI_NO_SORT; - return NULL; - } else if (!strcmp(str, "biosirq")) { - pci_probe |= PCI_BIOS_IRQ_SCAN; - return NULL; - } -#endif -#ifdef CONFIG_PCI_DIRECT - else if (!strcmp(str, "conf1")) { - pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS; - return NULL; - } - else if (!strcmp(str, "conf2")) { - pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS; - return NULL; - } -#endif - else if (!strcmp(str, "rom")) { - pci_probe |= PCI_ASSIGN_ROMS; - return NULL; - } else if (!strcmp(str, "assign-busses")) { - pci_probe |= PCI_ASSIGN_ALL_BUSSES; - return NULL; - } else if (!strncmp(str, "irqmask=", 8)) { - pcibios_irq_mask = simple_strtol(str+8, NULL, 0); - return NULL; - } else if (!strncmp(str, "lastbus=", 8)) { - pcibios_last_bus = simple_strtol(str+8, NULL, 0); - return NULL; - } else if (!strncmp(str, "noacpi", 6)) { - acpi_noirq_set(); - return NULL; - } - return str; -} - -unsigned int pcibios_assign_all_busses(void) -{ - return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; -} - -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - int err; - - if ((err = pcibios_enable_resources(dev, mask)) < 0) - return err; - -#ifdef CONFIG_ACPI_PCI - if (pci_using_acpi_prt) { - acpi_pci_irq_enable(dev); - return 0; - } -#endif - - pcibios_enable_irq(dev); - - return 0; -} diff --git a/xen/arch/i386/pdb-linux.c b/xen/arch/i386/pdb-linux.c deleted file mode 100644 index fd0fc5ed78..0000000000 --- a/xen/arch/i386/pdb-linux.c +++ /dev/null @@ -1,100 +0,0 @@ - -/* - * pervasive debugger - * www.cl.cam.ac.uk/netos/pdb - * - * alex ho - * 2004 - * university of cambridge computer laboratory - * - * linux & i386 dependent code. bleech. - */ - -#include - -/* offset to the first instruction in the linux system call code - where we can safely set a breakpoint */ -unsigned int pdb_linux_syscall_enter_bkpt_offset = 20; - -/* offset to eflags saved on the stack after an int 80 */ -unsigned int pdb_linux_syscall_eflags_offset = 48; - -/* offset to the instruction pointer saved on the stack after an int 80 */ -unsigned int pdb_linux_syscall_eip_offset = 40; - -unsigned char -pdb_linux_set_bkpt (unsigned long addr) -{ - unsigned char old_instruction = *(unsigned char *)addr; - *(unsigned char *)addr = 0xcc; - return old_instruction; -} - -void -pdb_linux_clr_bkpt (unsigned long addr, unsigned char value) -{ - *(unsigned char *)addr = value; -} - -void -pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code, - trap_info_t *ti) -{ - /* set at breakpoint at the beginning of the - system call in the target domain */ - - pdb_system_call_enter_instr = pdb_linux_set_bkpt(ti->address + - pdb_linux_syscall_enter_bkpt_offset); - pdb_system_call = 1; -} - -void -pdb_linux_syscall_exit_bkpt (struct pt_regs *regs, struct pdb_context *pdb_ctx) -{ - /* - we've hit an int 0x80 in a user's program, jumped into xen - (traps.c::do_general_protection()) which re-wrote the next - instruction in the os kernel to 0xcc, and then hit that - exception. - - we need to re-write the return instruction in the user's - program so that we know when we have finished the system call - and are back in the user's program. - - at this point our stack should look something like this: - - esp = 0x80a59f0 - esp + 4 = 0x0 - esp + 8 = 0x80485a0 - esp + 12 = 0x2d - esp + 16 = 0x80485f4 - esp + 20 = 0xbffffa48 - esp + 24 = 0xd - esp + 28 = 0xc00a0833 - esp + 32 = 0x833 - esp + 36 = 0xd - esp + 40 = 0x804dcdd saved eip - esp + 44 = 0x82b saved cs - esp + 48 = 0x213392 saved eflags - esp + 52 = 0xbffffa2c saved esp - esp + 56 = 0x833 saved ss - esp + 60 = 0x1000000 - */ - - /* restore the entry instruction for the system call */ - pdb_linux_clr_bkpt(regs->eip - 1, pdb_system_call_enter_instr); - - /* save the address of eflags that was saved on the stack */ - pdb_system_call_eflags_addr = (regs->esp + - pdb_linux_syscall_eflags_offset); - - /* muck with the return instruction so that we trap back into the - debugger when re-entering user space */ - pdb_system_call_next_addr = *(unsigned long *)(regs->esp + - pdb_linux_syscall_eip_offset); - pdb_linux_get_values (&pdb_system_call_leave_instr, 1, - pdb_system_call_next_addr, - pdb_ctx->process, pdb_ctx->ptbr); - pdb_linux_set_values ("cc", 1, pdb_system_call_next_addr, - pdb_ctx->process, pdb_ctx->ptbr); -} diff --git a/xen/arch/i386/pdb-stub.c b/xen/arch/i386/pdb-stub.c deleted file mode 100644 index 815d3a29df..0000000000 --- a/xen/arch/i386/pdb-stub.c +++ /dev/null @@ -1,1335 +0,0 @@ - -/* - * pervasive debugger - * www.cl.cam.ac.uk/netos/pdb - * - * alex ho - * 2004 - * university of cambridge computer laboratory - * - * code adapted originally from kgdb, nemesis, & gdbserver - */ - -#include -#include -#include -#include -#include -#include /* [un]map_domain_mem */ -#include -#include -#include -#include - -#undef PDB_DEBUG_TRACE -#ifdef PDB_DEBUG_TRACE -#define TRC(_x) _x -#else -#define TRC(_x) -#endif - -#define DEBUG_EXCEPTION 0x01 -#define BREAKPT_EXCEPTION 0x03 -#define PDB_LIVE_EXCEPTION 0x58 -#define KEYPRESS_EXCEPTION 0x88 - -#define BUFMAX 400 - -static const char hexchars[] = "0123456789abcdef"; - -static int remote_debug; - -#define PDB_BUFMAX 1024 -static char pdb_in_buffer[PDB_BUFMAX]; -static char pdb_out_buffer[PDB_BUFMAX]; -static char pdb_buffer[PDB_BUFMAX]; -static int pdb_in_buffer_ptr; -static unsigned char pdb_in_checksum; -static unsigned char pdb_xmit_checksum; - -struct pdb_context pdb_ctx; -int pdb_continue_thread = 0; -int pdb_general_thread = 0; - -void pdb_put_packet (unsigned char *buffer, int ack); -void pdb_bkpt_check (u_char *buffer, int length, - unsigned long cr3, unsigned long addr); - -int pdb_initialized = 0; -int pdb_page_fault_possible = 0; -int pdb_page_fault_scratch = 0; /* just a handy variable */ -int pdb_page_fault = 0; -static int pdb_serhnd = -1; -static int pdb_stepping = 0; - -int pdb_system_call = 0; -unsigned char pdb_system_call_enter_instr = 0; /* original enter instr */ -unsigned char pdb_system_call_leave_instr = 0; /* original next instr */ -unsigned long pdb_system_call_next_addr = 0; /* instr after int 0x80 */ -unsigned long pdb_system_call_eflags_addr = 0; /* saved eflags on stack */ - -static inline void pdb_put_char(unsigned char c) -{ - serial_putc(pdb_serhnd, c); -} - -static inline unsigned char pdb_get_char(void) -{ - return serial_getc(pdb_serhnd); -} - -int -get_char (char *addr) -{ - return *addr; -} - -void -set_char (char *addr, int val) -{ - *addr = val; -} - -void -pdb_process_query (char *ptr) -{ - if (strcmp(ptr, "C") == 0) - { - /* empty string */ - } - else if (strcmp(ptr, "fThreadInfo") == 0) - { -#ifdef PDB_PAST - struct task_struct *p; - u_long flags; -#endif /* PDB_PAST */ - - int buf_idx = 0; - - pdb_out_buffer[buf_idx++] = 'l'; - pdb_out_buffer[buf_idx++] = 0; - -#ifdef PDB_PAST - switch (pdb_level) - { - case PDB_LVL_XEN: /* return a list of domains */ - { - int count = 0; - - read_lock_irqsave (&tasklist_lock, flags); - - pdb_out_buffer[buf_idx++] = 'm'; - for_each_domain ( p ) - { - domid_t domain = p->domain + PDB_ID_OFFSET; - - if (count > 0) - { - pdb_out_buffer[buf_idx++] = ','; - } - if (domain > 15) - { - pdb_out_buffer[buf_idx++] = hexchars[domain >> 4]; - } - pdb_out_buffer[buf_idx++] = hexchars[domain % 16]; - count++; - } - pdb_out_buffer[buf_idx++] = 0; - - read_unlock_irqrestore(&tasklist_lock, flags); - break; - } - case PDB_LVL_GUESTOS: /* return a list of processes */ - { - int foobar[20]; - int loop, total; - - /* this cr3 is wrong! */ - total = pdb_linux_process_list(pdb_ctx[pdb_level].info_cr3, - foobar, 20); - - pdb_out_buffer[buf_idx++] = 'm'; - pdb_out_buffer[buf_idx++] = '1'; /* 1 is to go back */ - for (loop = 0; loop < total; loop++) - { - int pid = foobar[loop] + PDB_ID_OFFSET; - - pdb_out_buffer[buf_idx++] = ','; - if (pid > 15) - { - pdb_out_buffer[buf_idx++] = hexchars[pid >> 4]; - } - pdb_out_buffer[buf_idx++] = hexchars[pid % 16]; - } - pdb_out_buffer[buf_idx++] = 0; - break; - } - case PDB_LVL_PROCESS: /* hmmm... */ - { - pdb_out_buffer[buf_idx++] = 'm'; - pdb_out_buffer[buf_idx++] = '1'; /* 1 is to go back */ - break; - } - default: - break; - } -#endif /* PDB_PAST */ - - } - else if (strcmp(ptr, "sThreadInfo") == 0) - { - int buf_idx = 0; - - pdb_out_buffer[buf_idx++] = 'l'; - pdb_out_buffer[buf_idx++] = 0; - } - else if (strncmp(ptr, "ThreadExtraInfo,", 16) == 0) - { - int thread = 0; - char *message = "foobar ?"; - - ptr += 16; - if (hexToInt (&ptr, &thread)) - { - mem2hex (message, pdb_out_buffer, strlen(message) + 1); - } - -#ifdef PDB_PAST - int thread = 0; - char message[16]; - struct task_struct *p; - - p = find_domain_by_id(pdb_ctx[pdb_level].info); - strncpy (message, p->name, 16); - put_task_struct(p); - - ptr += 16; - if (hexToInt (&ptr, &thread)) - { - mem2hex ((char *)message, pdb_out_buffer, strlen(message) + 1); - } -#endif /* PDB_PAST */ - -#ifdef PDB_FUTURE - { - char string[task_struct_comm_length]; - - string[0] = 0; - pdb_linux_process_details (cr3, pid, string); - printk (" (%s)", string); - } -#endif /* PDB_FUTURE*/ - - } - else if (strcmp(ptr, "Offsets") == 0) - { - /* empty string */ - } - else if (strncmp(ptr, "Symbol", 6) == 0) - { - strcpy (pdb_out_buffer, "OK"); - } - else - { - printk("pdb: error, unknown query [%s]\n", ptr); - } -} - -void -pdb_x86_to_gdb_regs (char *buffer, struct pt_regs *regs) -{ - int idx = 0; - - mem2hex ((char *)®s->eax, &buffer[idx], sizeof(regs->eax)); - idx += sizeof(regs->eax) * 2; - mem2hex ((char *)®s->ecx, &buffer[idx], sizeof(regs->ecx)); - idx += sizeof(regs->ecx) * 2; - mem2hex ((char *)®s->edx, &buffer[idx], sizeof(regs->edx)); - idx += sizeof(regs->edx) * 2; - mem2hex ((char *)®s->ebx, &buffer[idx], sizeof(regs->ebx)); - idx += sizeof(regs->ebx) * 2; - mem2hex ((char *)®s->esp, &buffer[idx], sizeof(regs->esp)); - idx += sizeof(regs->esp) * 2; - mem2hex ((char *)®s->ebp, &buffer[idx], sizeof(regs->ebp)); - idx += sizeof(regs->ebp) * 2; - mem2hex ((char *)®s->esi, &buffer[idx], sizeof(regs->esi)); - idx += sizeof(regs->esi) * 2; - mem2hex ((char *)®s->edi, &buffer[idx], sizeof(regs->edi)); - idx += sizeof(regs->edi) * 2; - mem2hex ((char *)®s->eip, &buffer[idx], sizeof(regs->eip)); - idx += sizeof(regs->eip) * 2; - mem2hex ((char *)®s->eflags, &buffer[idx], sizeof(regs->eflags)); - idx += sizeof(regs->eflags) * 2; - mem2hex ((char *)®s->xcs, &buffer[idx], sizeof(regs->xcs)); - idx += sizeof(regs->xcs) * 2; - mem2hex ((char *)®s->xss, &buffer[idx], sizeof(regs->xss)); - idx += sizeof(regs->xss) * 2; - mem2hex ((char *)®s->xds, &buffer[idx], sizeof(regs->xds)); - idx += sizeof(regs->xds) * 2; - mem2hex ((char *)®s->xes, &buffer[idx], sizeof(regs->xes)); - idx += sizeof(regs->xes) * 2; - mem2hex ((char *)®s->xfs, &buffer[idx], sizeof(regs->xfs)); - idx += sizeof(regs->xfs) * 2; - mem2hex ((char *)®s->xgs, &buffer[idx], sizeof(regs->xgs)); -} - -/* at this point we allow any register to be changed, caveat emptor */ -void -pdb_gdb_to_x86_regs (struct pt_regs *regs, char *buffer) -{ - hex2mem(buffer, (char *)®s->eax, sizeof(regs->eax)); - buffer += sizeof(regs->eax) * 2; - hex2mem(buffer, (char *)®s->ecx, sizeof(regs->ecx)); - buffer += sizeof(regs->ecx) * 2; - hex2mem(buffer, (char *)®s->edx, sizeof(regs->edx)); - buffer += sizeof(regs->edx) * 2; - hex2mem(buffer, (char *)®s->ebx, sizeof(regs->ebx)); - buffer += sizeof(regs->ebx) * 2; - hex2mem(buffer, (char *)®s->esp, sizeof(regs->esp)); - buffer += sizeof(regs->esp) * 2; - hex2mem(buffer, (char *)®s->ebp, sizeof(regs->ebp)); - buffer += sizeof(regs->ebp) * 2; - hex2mem(buffer, (char *)®s->esi, sizeof(regs->esi)); - buffer += sizeof(regs->esi) * 2; - hex2mem(buffer, (char *)®s->edi, sizeof(regs->edi)); - buffer += sizeof(regs->edi) * 2; - hex2mem(buffer, (char *)®s->eip, sizeof(regs->eip)); - buffer += sizeof(regs->eip) * 2; - hex2mem(buffer, (char *)®s->eflags, sizeof(regs->eflags)); - buffer += sizeof(regs->eflags) * 2; - hex2mem(buffer, (char *)®s->xcs, sizeof(regs->xcs)); - buffer += sizeof(regs->xcs) * 2; - hex2mem(buffer, (char *)®s->xss, sizeof(regs->xss)); - buffer += sizeof(regs->xss) * 2; - hex2mem(buffer, (char *)®s->xds, sizeof(regs->xds)); - buffer += sizeof(regs->xds) * 2; - hex2mem(buffer, (char *)®s->xes, sizeof(regs->xes)); - buffer += sizeof(regs->xes) * 2; - hex2mem(buffer, (char *)®s->xfs, sizeof(regs->xfs)); - buffer += sizeof(regs->xfs) * 2; - hex2mem(buffer, (char *)®s->xgs, sizeof(regs->xgs)); -} - -int -pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3, - int sigval) -{ - int length; - unsigned long addr; - int ack = 1; /* wait for ack in pdb_put_packet */ - int go = 0; - - TRC(printf("pdb: [%s]\n", ptr)); - - pdb_out_buffer[0] = 0; - - if (pdb_ctx.valid == 1) - { - if (pdb_ctx.domain == -1) /* pdb context: xen */ - { - struct task_struct *p; - - p = &idle0_task; - if (p->mm.shadow_mode) - pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table); - else - pdb_ctx.ptbr = pagetable_val(p->mm.pagetable); - } - else if (pdb_ctx.process == -1) /* pdb context: guest os */ - { - struct task_struct *p; - - if (pdb_ctx.domain == -2) - { - p = find_last_domain(); - } - else - { - p = find_domain_by_id(pdb_ctx.domain); - } - if (p == NULL) - { - printk ("pdb error: unknown domain [0x%x]\n", pdb_ctx.domain); - strcpy (pdb_out_buffer, "E01"); - pdb_ctx.domain = -1; - goto exit; - } - if (p->mm.shadow_mode) - pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table); - else - pdb_ctx.ptbr = pagetable_val(p->mm.pagetable); - put_task_struct(p); - } - else /* pdb context: process */ - { - struct task_struct *p; - unsigned long domain_ptbr; - - p = find_domain_by_id(pdb_ctx.domain); - if (p == NULL) - { - printk ("pdb error: unknown domain [0x%x][0x%x]\n", - pdb_ctx.domain, pdb_ctx.process); - strcpy (pdb_out_buffer, "E01"); - pdb_ctx.domain = -1; - goto exit; - } - if (p->mm.shadow_mode) - domain_ptbr = pagetable_val(p->mm.shadow_table); - else - domain_ptbr = pagetable_val(p->mm.pagetable); - put_task_struct(p); - - pdb_ctx.ptbr = domain_ptbr; - /*pdb_ctx.ptbr=pdb_linux_pid_ptbr(domain_ptbr, pdb_ctx.process);*/ - } - - pdb_ctx.valid = 0; - TRC(printk ("pdb change context (dom:%d, proc:%d) now 0x%lx\n", - pdb_ctx.domain, pdb_ctx.process, pdb_ctx.ptbr)); - } - - switch (*ptr++) - { - case '?': - pdb_out_buffer[0] = 'S'; - pdb_out_buffer[1] = hexchars[sigval >> 4]; - pdb_out_buffer[2] = hexchars[sigval % 16]; - pdb_out_buffer[3] = 0; - break; - case 'S': /* step with signal */ - case 's': /* step */ - { - if ( pdb_system_call_eflags_addr != 0 ) - { - unsigned long eflags; - char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */ - - pdb_linux_get_values((u_char*)&eflags, sizeof(eflags), - pdb_system_call_eflags_addr, - pdb_ctx.process, pdb_ctx.ptbr); - eflags |= X86_EFLAGS_TF; - mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags)); - pdb_linux_set_values(eflags_buf, sizeof(eflags), - pdb_system_call_eflags_addr, - pdb_ctx.process, pdb_ctx.ptbr); - } - - regs->eflags |= X86_EFLAGS_TF; - pdb_stepping = 1; - return 1; - /* not reached */ - } - case 'C': /* continue with signal */ - case 'c': /* continue */ - { - if ( pdb_system_call_eflags_addr != 0 ) - { - unsigned long eflags; - char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */ - - pdb_linux_get_values((u_char*)&eflags, sizeof(eflags), - pdb_system_call_eflags_addr, - pdb_ctx.process, pdb_ctx.ptbr); - eflags &= ~X86_EFLAGS_TF; - mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags)); - pdb_linux_set_values(eflags_buf, sizeof(eflags), - pdb_system_call_eflags_addr, - pdb_ctx.process, pdb_ctx.ptbr); - } - - regs->eflags &= ~X86_EFLAGS_TF; - return 1; /* jump out before replying to gdb */ - /* not reached */ - } - case 'd': - remote_debug = !(remote_debug); /* toggle debug flag */ - break; - case 'D': /* detach */ - return go; - /* not reached */ - case 'g': /* return the value of the CPU registers */ - { - pdb_x86_to_gdb_regs (pdb_out_buffer, regs); - break; - } - case 'G': /* set the value of the CPU registers - return OK */ - { - pdb_gdb_to_x86_regs (regs, ptr); - break; - } - case 'H': - { - int thread; - char *next = &ptr[1]; - - if (hexToInt (&next, &thread)) - { - if (*ptr == 'c') - { - pdb_continue_thread = thread; - } - else if (*ptr == 'g') - { - pdb_general_thread = thread; - } - else - { - printk ("pdb error: unknown set thread command %c (%d)\n", - *ptr, thread); - strcpy (pdb_out_buffer, "E00"); - break; - } - } - strcpy (pdb_out_buffer, "OK"); - break; - } - case 'k': /* kill request */ - { - strcpy (pdb_out_buffer, "OK"); /* ack for fun */ - printk ("don't kill bill...\n"); - ack = 0; - break; - } - - case 'q': - { - pdb_process_query(ptr); - break; - } - - /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ - case 'm': - { - /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ - if (hexToInt (&ptr, (int *)&addr)) - if (*(ptr++) == ',') - if (hexToInt (&ptr, &length)) - { - ptr = 0; - - pdb_page_fault_possible = 1; - pdb_page_fault = 0; - if (addr >= PAGE_OFFSET) - { - mem2hex ((char *) addr, pdb_out_buffer, length); - } - else if (pdb_ctx.process != -1) - { - pdb_linux_get_values(pdb_buffer, length, addr, - pdb_ctx.process, pdb_ctx.ptbr); - mem2hex (pdb_buffer, pdb_out_buffer, length); - } - else - { - pdb_get_values (pdb_buffer, length, - pdb_ctx.ptbr, addr); - mem2hex (pdb_buffer, pdb_out_buffer, length); - } - - pdb_page_fault_possible = 0; - if (pdb_page_fault) - { - strcpy (pdb_out_buffer, "E03"); - } - } - - if (ptr) - { - strcpy (pdb_out_buffer, "E01"); - } - break; - } - - /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */ - case 'M': - { - /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ - if (hexToInt (&ptr, (int *)&addr)) - if (*(ptr++) == ',') - if (hexToInt (&ptr, &length)) - if (*(ptr++) == ':') - { - - pdb_page_fault_possible = 1; - pdb_page_fault = 0; - if (addr >= PAGE_OFFSET) - { - hex2mem (ptr, (char *)addr, length); - pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr); - } - else if (pdb_ctx.process != -1) - { - pdb_linux_set_values(ptr, length, addr, - pdb_ctx.process, - pdb_ctx.ptbr); - pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr); - } - else - { - pdb_set_values (ptr, length, - pdb_ctx.ptbr, addr); - pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr); - } - pdb_page_fault_possible = 0; - if (pdb_page_fault) - { - strcpy (pdb_out_buffer, "E03"); - } - else - { - strcpy (pdb_out_buffer, "OK"); - } - - ptr = 0; - } - if (ptr) - { - strcpy (pdb_out_buffer, "E02"); - } - break; - } - case 'T': - { - int id; - - if (hexToInt (&ptr, &id)) - { - strcpy (pdb_out_buffer, "E00"); - -#ifdef PDB_PAST - - switch (pdb_level) /* previous level */ - { - case PDB_LVL_XEN: - { - struct task_struct *p; - id -= PDB_ID_OFFSET; - if ( (p = find_domain_by_id(id)) == NULL) - strcpy (pdb_out_buffer, "E00"); - else - strcpy (pdb_out_buffer, "OK"); - put_task_struct(p); - - pdb_level = PDB_LVL_GUESTOS; - pdb_ctx[pdb_level].ctrl = id; - pdb_ctx[pdb_level].info = id; - break; - } - case PDB_LVL_GUESTOS: - { - if (pdb_level == -1) - { - pdb_level = PDB_LVL_XEN; - } - else - { - pdb_level = PDB_LVL_PROCESS; - pdb_ctx[pdb_level].ctrl = id; - pdb_ctx[pdb_level].info = id; - } - break; - } - case PDB_LVL_PROCESS: - { - if (pdb_level == -1) - { - pdb_level = PDB_LVL_GUESTOS; - } - break; - } - default: - { - printk ("pdb internal error: invalid level [%d]\n", - pdb_level); - } - } - -#endif /* PDB_PAST */ - } - break; - } - } - -exit: - /* reply to the request */ - pdb_put_packet (pdb_out_buffer, ack); - - return go; -} - -/* - * process an input character from the serial line. - * - * return "1" if the character is a gdb debug string - * (and hence shouldn't be further processed). - */ - -int pdb_debug_state = 0; /* small parser state machine */ - -int pdb_serial_input(u_char c, struct pt_regs *regs) -{ - int out = 1; - int loop, count; - unsigned long cr3; - - __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); - - switch (pdb_debug_state) - { - case 0: /* not currently processing debug string */ - if ( c == '$' ) /* start token */ - { - pdb_debug_state = 1; - pdb_in_buffer_ptr = 0; - pdb_in_checksum = 0; - pdb_xmit_checksum = 0; - } - else - { - out = 0; - } - break; - case 1: /* saw '$' */ - if ( c == '#' ) /* checksum token */ - { - pdb_debug_state = 2; - pdb_in_buffer[pdb_in_buffer_ptr] = 0; - } - else - { - pdb_in_checksum += c; - pdb_in_buffer[pdb_in_buffer_ptr++] = c; - } - break; - case 2: /* 1st checksum digit */ - pdb_xmit_checksum = hex(c) << 4; - pdb_debug_state = 3; - break; - case 3: /* 2nd checksum digit */ - pdb_xmit_checksum += hex(c); - if (pdb_in_checksum != pdb_xmit_checksum) - { - pdb_put_char('-'); /* checksum failure */ - printk ("checksum failure [%s.%02x.%02x]\n", pdb_in_buffer, - pdb_in_checksum, pdb_xmit_checksum); - } - else - { - pdb_put_char('+'); /* checksum okay */ - if ( pdb_in_buffer_ptr > 1 && pdb_in_buffer[2] == ':' ) - { - pdb_put_char(pdb_in_buffer[0]); - pdb_put_char(pdb_in_buffer[1]); - /* remove sequence chars from buffer */ - count = strlen(pdb_in_buffer); - for (loop = 3; loop < count; loop++) - pdb_in_buffer[loop - 3] = pdb_in_buffer[loop]; - } - - pdb_process_command (pdb_in_buffer, regs, cr3, - PDB_LIVE_EXCEPTION); - } - pdb_debug_state = 0; - break; - } - - return out; -} - -int hex(char ch) -{ - if ((ch >= 'a') && (ch <= 'f')) return (ch-'a'+10); - if ((ch >= '0') && (ch <= '9')) return (ch-'0'); - if ((ch >= 'A') && (ch <= 'F')) return (ch-'A'+10); - return (-1); -} - -/* convert the memory pointed to by mem into hex, placing result in buf */ -/* return a pointer to the last char put in buf (null) */ -char * -mem2hex (mem, buf, count) - char *mem; - char *buf; - int count; -{ - int i; - unsigned char ch; - - for (i = 0; i < count; i++) - { - ch = get_char (mem++); - *buf++ = hexchars[ch >> 4]; - *buf++ = hexchars[ch % 16]; - } - *buf = 0; - return (buf); -} - -/* convert the hex array pointed to by buf into binary to be placed in mem */ -/* return a pointer to the character AFTER the last byte written */ -char * -hex2mem (buf, mem, count) - char *buf; - char *mem; - int count; -{ - int i; - unsigned char ch; - - for (i = 0; i < count; i++) - { - ch = hex (*buf++) << 4; - ch = ch + hex (*buf++); - set_char (mem++, ch); - } - return (mem); -} - -int -hexToInt (char **ptr, int *intValue) -{ - int numChars = 0; - int hexValue; - int negative = 0; - - *intValue = 0; - - if (**ptr == '-') - { - negative = 1; - numChars++; - (*ptr)++; - } - - while (**ptr) - { - hexValue = hex (**ptr); - if (hexValue >= 0) - { - *intValue = (*intValue << 4) | hexValue; - numChars++; - } - else - break; - - (*ptr)++; - } - - if ( negative ) - *intValue *= -1; - - return (numChars); -} - -/***********************************************************************/ -/***********************************************************************/ - - -/* - * Add a breakpoint to the list of known breakpoints. - * For now there should only be two or three breakpoints so - * we use a simple linked list. In the future, maybe a red-black tree? - */ -struct pdb_breakpoint breakpoints; - -void pdb_bkpt_add (unsigned long cr3, unsigned long address) -{ - struct pdb_breakpoint *bkpt = kmalloc(sizeof(*bkpt), GFP_KERNEL); - bkpt->cr3 = cr3; - bkpt->address = address; - list_add(&bkpt->list, &breakpoints.list); -} - -/* - * Check to see of the breakpoint is in the list of known breakpoints - * Return 1 if it has been set, NULL otherwise. - */ -struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3, - unsigned long address) -{ - struct list_head *list_entry; - struct pdb_breakpoint *bkpt; - - list_for_each(list_entry, &breakpoints.list) - { - bkpt = list_entry(list_entry, struct pdb_breakpoint, list); - if ( bkpt->cr3 == cr3 && bkpt->address == address ) - return bkpt; - } - - return NULL; -} - -/* - * Remove a breakpoint to the list of known breakpoints. - * Return 1 if the element was not found, otherwise 0. - */ -int pdb_bkpt_remove (unsigned long cr3, unsigned long address) -{ - struct list_head *list_entry; - struct pdb_breakpoint *bkpt; - - list_for_each(list_entry, &breakpoints.list) - { - bkpt = list_entry(list_entry, struct pdb_breakpoint, list); - if ( bkpt->cr3 == cr3 && bkpt->address == address ) - { - list_del(&bkpt->list); - kfree(bkpt); - return 0; - } - } - - return 1; -} - -/* - * Check to see if a memory write is really gdb setting a breakpoint - */ -void pdb_bkpt_check (u_char *buffer, int length, - unsigned long cr3, unsigned long addr) -{ - if (length == 1 && buffer[0] == 'c' && buffer[1] == 'c') - { - /* inserting a new breakpoint */ - pdb_bkpt_add(cr3, addr); - TRC(printk("pdb breakpoint detected at 0x%lx:0x%lx\n", cr3, addr)); - } - else if ( pdb_bkpt_remove(cr3, addr) == 0 ) - { - /* removing a breakpoint */ - TRC(printk("pdb breakpoint cleared at 0x%lx:0x%lx\n", cr3, addr)); - } -} - -/***********************************************************************/ - -int pdb_change_values(u_char *buffer, int length, - unsigned long cr3, unsigned long addr, int rw); -int pdb_change_values_one_page(u_char *buffer, int length, - unsigned long cr3, unsigned long addr, int rw); - -#define __PDB_GET_VAL 1 -#define __PDB_SET_VAL 2 - -/* - * Set memory in a domain's address space - * Set "length" bytes at "address" from "domain" to the values in "buffer". - * Return the number of bytes set, 0 if there was a problem. - */ - -int pdb_set_values(u_char *buffer, int length, - unsigned long cr3, unsigned long addr) -{ - int count = pdb_change_values(buffer, length, cr3, addr, __PDB_SET_VAL); - return count; -} - -/* - * Read memory from a domain's address space. - * Fetch "length" bytes at "address" from "domain" into "buffer". - * Return the number of bytes read, 0 if there was a problem. - */ - -int pdb_get_values(u_char *buffer, int length, - unsigned long cr3, unsigned long addr) -{ - return pdb_change_values(buffer, length, cr3, addr, __PDB_GET_VAL); -} - -/* - * Read or write memory in an address space - */ -int pdb_change_values(u_char *buffer, int length, - unsigned long cr3, unsigned long addr, int rw) -{ - int remaining; /* number of bytes to touch past this page */ - int bytes = 0; - - while ( (remaining = (addr + length - 1) - (addr | (PAGE_SIZE - 1))) > 0) - { - bytes += pdb_change_values_one_page(buffer, length - remaining, - cr3, addr, rw); - buffer = buffer + (2 * (length - remaining)); - length = remaining; - addr = (addr | (PAGE_SIZE - 1)) + 1; - } - - bytes += pdb_change_values_one_page(buffer, length, cr3, addr, rw); - return bytes; -} - -/* - * Change memory in a process' address space in one page - * Read or write "length" bytes at "address" into/from "buffer" - * from the virtual address space referenced by "cr3". - * Return the number of bytes read, 0 if there was a problem. - */ - -int pdb_change_values_one_page(u_char *buffer, int length, - unsigned long cr3, unsigned long addr, int rw) -{ - l2_pgentry_t* l2_table = NULL; - l1_pgentry_t* l1_table = NULL; - u_char *page; - int bytes = 0; - - l2_table = map_domain_mem(cr3); - l2_table += l2_table_offset(addr); - if (!(l2_pgentry_val(*l2_table) & _PAGE_PRESENT)) - { - if (pdb_page_fault_possible == 1) - { - pdb_page_fault = 1; - TRC(printk("pdb: L2 error (0x%lx)\n", addr)); - } - else - { - struct task_struct *p = find_domain_by_id(0); - printk ("pdb error: cr3: 0x%lx dom0cr3: 0x%lx\n", cr3, - p->mm.shadow_mode ? pagetable_val(p->mm.shadow_table) - : pagetable_val(p->mm.pagetable)); - put_task_struct(p); - printk ("pdb error: L2:0x%p (0x%lx)\n", - l2_table, l2_pgentry_val(*l2_table)); - } - goto exit2; - } - - if (l2_pgentry_val(*l2_table) & _PAGE_PSE) - { -#define PSE_PAGE_SHIFT L2_PAGETABLE_SHIFT -#define PSE_PAGE_SIZE (1UL << PSE_PAGE_SHIFT) -#define PSE_PAGE_MASK (~(PSE_PAGE_SIZE-1)) - -#define L1_PAGE_BITS ( (ENTRIES_PER_L1_PAGETABLE - 1) << L1_PAGETABLE_SHIFT ) - -#define pse_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PSE_PAGE_MASK) - - page = map_domain_mem(pse_pgentry_to_phys(*l2_table) + /* 10 bits */ - (addr & L1_PAGE_BITS)); /* 10 bits */ - page += addr & (PAGE_SIZE - 1); /* 12 bits */ - } - else - { - l1_table = map_domain_mem(l2_pgentry_to_phys(*l2_table)); - l1_table += l1_table_offset(addr); - if (!(l1_pgentry_val(*l1_table) & _PAGE_PRESENT)) - { - if (pdb_page_fault_possible == 1) - { - pdb_page_fault = 1; - TRC(printk ("pdb: L1 error (0x%lx)\n", addr)); - } - else - { - printk ("L2:0x%p (0x%lx) L1:0x%p (0x%lx)\n", - l2_table, l2_pgentry_val(*l2_table), - l1_table, l1_pgentry_val(*l1_table)); - } - goto exit1; - } - - page = map_domain_mem(l1_pgentry_to_phys(*l1_table)); - page += addr & (PAGE_SIZE - 1); - } - - switch (rw) - { - case __PDB_GET_VAL: /* read */ - memcpy (buffer, page, length); - bytes = length; - break; - case __PDB_SET_VAL: /* write */ - hex2mem (buffer, page, length); - bytes = length; - break; - default: /* unknown */ - printk ("error: unknown RW flag: %d\n", rw); - return 0; - } - - unmap_domain_mem((void *)page); -exit1: - if (l1_table != NULL) - unmap_domain_mem((void *)l1_table); -exit2: - unmap_domain_mem((void *)l2_table); - - return bytes; -} - -/***********************************************************************/ - -void breakpoint(void); - -/* send the packet in buffer. */ -void pdb_put_packet (unsigned char *buffer, int ack) -{ - unsigned char checksum; - int count; - char ch; - - /* $# */ - /* do */ - { - pdb_put_char ('$'); - checksum = 0; - count = 0; - - while ((ch = buffer[count])) - { - pdb_put_char (ch); - checksum += ch; - count += 1; - } - - pdb_put_char('#'); - pdb_put_char(hexchars[checksum >> 4]); - pdb_put_char(hexchars[checksum % 16]); - } - - if (ack) - { - if ((ch = pdb_get_char()) != '+') - { - printk(" pdb return error: %c 0x%x [%s]\n", ch, ch, buffer); - } - } -} - -void pdb_get_packet(char *buffer) -{ - int count; - char ch; - unsigned char checksum = 0; - unsigned char xmitcsum = 0; - - do - { - while ((ch = pdb_get_char()) != '$'); - - count = 0; - checksum = 0; - - while (count < BUFMAX) - { - ch = pdb_get_char(); - if (ch == '#') break; - checksum += ch; - buffer[count] = ch; - count++; - } - buffer[count] = 0; - - if (ch == '#') - { - xmitcsum = hex(pdb_get_char()) << 4; - xmitcsum += hex(pdb_get_char()); - - if (xmitcsum == checksum) - { - pdb_put_char('+'); - if (buffer[2] == ':') - { - printk ("pdb: obsolete gdb packet (sequence ID)\n"); - } - } - else - { - pdb_put_char('-'); - } - } - } while (checksum != xmitcsum); - - return; -} - -/* - * process a machine interrupt or exception - * Return 1 if pdb is not interested in the exception; it should - * be propagated to the guest os. - */ - -int pdb_handle_exception(int exceptionVector, - struct pt_regs *xen_regs) -{ - int signal = 0; - struct pdb_breakpoint* bkpt; - int watchdog_save; - unsigned long cr3; - - __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); - - /* If the exception is an int3 from user space then pdb is only - interested if it re-wrote an instruction set the breakpoint. - This occurs when leaving a system call from a domain. - */ - if ( exceptionVector == 3 && - (xen_regs->xcs & 3) == 3 && - xen_regs->eip != pdb_system_call_next_addr + 1) - { - TRC(printf("pdb: user bkpt (0x%x) at 0x%x:0x%lx:0x%lx\n", - exceptionVector, xen_regs->xcs & 3, cr3, xen_regs->eip)); - return 1; - } - - /* - * If PDB didn't set the breakpoint, is not single stepping, - * is not entering a system call in a domain, - * the user didn't press the magic debug key, - * then we don't handle the exception. - */ - bkpt = pdb_bkpt_search(cr3, xen_regs->eip - 1); - if ( (bkpt == NULL) && - !pdb_stepping && - !pdb_system_call && - xen_regs->eip != pdb_system_call_next_addr + 1 && - (exceptionVector != KEYPRESS_EXCEPTION) && - xen_regs->eip < 0xc0000000) /* Linux-specific for now! */ - { - TRC(printf("pdb: user bkpt (0x%x) at 0x%lx:0x%lx\n", - exceptionVector, cr3, xen_regs->eip)); - return 1; - } - - printk("pdb_handle_exception [0x%x][0x%lx:0x%lx]\n", - exceptionVector, cr3, xen_regs->eip); - - if ( pdb_stepping ) - { - /* Stepped one instruction; now return to normal execution. */ - xen_regs->eflags &= ~X86_EFLAGS_TF; - pdb_stepping = 0; - } - - if ( pdb_system_call ) - { - pdb_system_call = 0; - - pdb_linux_syscall_exit_bkpt (xen_regs, &pdb_ctx); - - /* we don't have a saved breakpoint so we need to rewind eip */ - xen_regs->eip--; - - /* if ther user doesn't care about breaking when entering a - system call then we'll just ignore the exception */ - if ( (pdb_ctx.system_call & 0x01) == 0 ) - { - return 0; - } - } - - if ( exceptionVector == BREAKPT_EXCEPTION && bkpt != NULL) - { - /* Executed Int3: replace breakpoint byte with real program byte. */ - xen_regs->eip--; - } - - /* returning to user space after a system call */ - if ( xen_regs->eip == pdb_system_call_next_addr + 1) - { - u_char instr[2]; /* REALLY REALLY REALLY STUPID */ - - mem2hex (&pdb_system_call_leave_instr, instr, sizeof(instr)); - - pdb_linux_set_values (instr, 1, pdb_system_call_next_addr, - pdb_ctx.process, pdb_ctx.ptbr); - - pdb_system_call_next_addr = 0; - pdb_system_call_leave_instr = 0; - - /* manually rewind eip */ - xen_regs->eip--; - - /* if the user doesn't care about breaking when returning - to user space after a system call then we'll just ignore - the exception */ - if ( (pdb_ctx.system_call & 0x02) == 0 ) - { - return 0; - } - } - - /* Generate a signal for GDB. */ - switch ( exceptionVector ) - { - case KEYPRESS_EXCEPTION: - signal = 2; break; /* SIGINT */ - case DEBUG_EXCEPTION: - signal = 5; break; /* SIGTRAP */ - case BREAKPT_EXCEPTION: - signal = 5; break; /* SIGTRAP */ - default: - printk("pdb: can't generate signal for unknown exception vector %d\n", - exceptionVector); - break; - } - - pdb_out_buffer[0] = 'S'; - pdb_out_buffer[1] = hexchars[signal >> 4]; - pdb_out_buffer[2] = hexchars[signal % 16]; - pdb_out_buffer[3] = 0; - pdb_put_packet(pdb_out_buffer, 1); - - watchdog_save = watchdog_on; - watchdog_on = 0; - - do { - pdb_out_buffer[0] = 0; - pdb_get_packet(pdb_in_buffer); - } - while ( pdb_process_command(pdb_in_buffer, xen_regs, cr3, signal) == 0 ); - - watchdog_on = watchdog_save; - - return 0; -} - -void pdb_key_pressed(u_char key, void *dev_id, struct pt_regs *regs) -{ - pdb_handle_exception(KEYPRESS_EXCEPTION, regs); - return; -} - -void initialize_pdb() -{ - extern char opt_pdb[]; - - /* Certain state must be initialised even when PDB will not be used. */ - memset((void *) &breakpoints, 0, sizeof(breakpoints)); - INIT_LIST_HEAD(&breakpoints.list); - pdb_stepping = 0; - - if ( strcmp(opt_pdb, "none") == 0 ) - return; - - if ( (pdb_serhnd = parse_serial_handle(opt_pdb)) == -1 ) - { - printk("error: failed to initialize PDB on port %s\n", opt_pdb); - return; - } - - pdb_ctx.valid = 1; - pdb_ctx.domain = -1; - pdb_ctx.process = -1; - pdb_ctx.system_call = 0; - pdb_ctx.ptbr = 0; - - printk("pdb: pervasive debugger (%s) www.cl.cam.ac.uk/netos/pdb\n", - opt_pdb); - - /* Acknowledge any spurious GDB packets. */ - pdb_put_char('+'); - - add_key_handler('D', pdb_key_pressed, "enter pervasive debugger"); - - pdb_initialized = 1; -} - -void breakpoint(void) -{ - if ( pdb_initialized ) - asm("int $3"); -} diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c deleted file mode 100644 index 7524e9c5c5..0000000000 --- a/xen/arch/i386/process.c +++ /dev/null @@ -1,364 +0,0 @@ -/* - * linux/arch/i386/kernel/process.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -/* - * This file handles the architecture-dependent parts of process handling.. - */ - -#define __KERNEL_SYSCALLS__ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int hlt_counter; - -void disable_hlt(void) -{ - hlt_counter++; -} - -void enable_hlt(void) -{ - hlt_counter--; -} - -/* - * We use this if we don't have any better - * idle routine.. - */ -static void default_idle(void) -{ - if (!hlt_counter) { - __cli(); - if (!current->hyp_events && !softirq_pending(smp_processor_id())) - safe_halt(); - else - __sti(); - } -} - -void continue_cpu_idle_loop(void) -{ - int cpu = smp_processor_id(); - for ( ; ; ) - { - irq_stat[cpu].idle_timestamp = jiffies; - while (!current->hyp_events && !softirq_pending(cpu)) - default_idle(); - do_hyp_events(); - do_softirq(); - } -} - -void startup_cpu_idle_loop(void) -{ - /* Just some sanity to ensure that the scheduler is set up okay. */ - ASSERT(current->domain == IDLE_DOMAIN_ID); - (void)wake_up(current); - __enter_scheduler(); - - /* - * Declares CPU setup done to the boot processor. - * Therefore memory barrier to ensure state is visible. - */ - smp_mb(); - init_idle(); - - continue_cpu_idle_loop(); -} - -static long no_idt[2]; -static int reboot_mode; -int reboot_thru_bios = 0; - -#ifdef CONFIG_SMP -int reboot_smp = 0; -static int reboot_cpu = -1; -/* shamelessly grabbed from lib/vsprintf.c for readability */ -#define is_digit(c) ((c) >= '0' && (c) <= '9') -#endif - - -static inline void kb_wait(void) -{ - int i; - - for (i=0; i<0x10000; i++) - if ((inb_p(0x64) & 0x02) == 0) - break; -} - - -void machine_restart(char * __unused) -{ - extern int opt_noreboot; -#ifdef CONFIG_SMP - int cpuid; -#endif - - if ( opt_noreboot ) - { - printk("Reboot disabled on cmdline: require manual reset\n"); - for ( ; ; ) __asm__ __volatile__ ("hlt"); - } - -#ifdef CONFIG_SMP - cpuid = GET_APIC_ID(apic_read(APIC_ID)); - - /* KAF: Need interrupts enabled for safe IPI. */ - __sti(); - - if (reboot_smp) { - - /* check to see if reboot_cpu is valid - if its not, default to the BSP */ - if ((reboot_cpu == -1) || - (reboot_cpu > (NR_CPUS -1)) || - !(phys_cpu_present_map & (1<shared_info->execution_context; - - /* - * Initial register values: - * DS,ES,FS,GS = FLAT_RING1_DS - * CS:EIP = FLAT_RING1_CS:start_pc - * SS:ESP = FLAT_RING1_DS:start_stack - * ESI = start_info - * [EAX,EBX,ECX,EDX,EDI,EBP are zero] - */ - ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_RING1_DS; - ec->cs = FLAT_RING1_CS; - ec->eip = start_pc; - ec->esp = start_stack; - ec->esi = start_info; - - __save_flags(ec->eflags); - ec->eflags |= X86_EFLAGS_IF; - - /* No fast trap at start of day. */ - SET_DEFAULT_FAST_TRAP(&p->thread); -} - - -/* - * This special macro can be used to load a debugging register - */ -#define loaddebug(thread,register) \ - __asm__("movl %0,%%db" #register \ - : /* no output */ \ - :"r" (thread->debugreg[register])) - - -void switch_to(struct task_struct *prev_p, struct task_struct *next_p) -{ - struct thread_struct *next = &next_p->thread; - struct tss_struct *tss = init_tss + smp_processor_id(); - execution_context_t *stack_ec = get_execution_context(); - int i; - - __cli(); - - /* Switch guest general-register state. */ - if ( !is_idle_task(prev_p) ) - { - memcpy(&prev_p->shared_info->execution_context, - stack_ec, - sizeof(*stack_ec)); - unlazy_fpu(prev_p); - CLEAR_FAST_TRAP(&prev_p->thread); - } - - if ( !is_idle_task(next_p) ) - { - memcpy(stack_ec, - &next_p->shared_info->execution_context, - sizeof(*stack_ec)); - - /* - * This is sufficient! If the descriptor DPL differs from CS RPL then - * we'll #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared - * automatically. If SS RPL or DPL differs from CS RPL then we'll #GP. - */ - if ( (stack_ec->cs & 3) == 0 ) - stack_ec->cs = FLAT_RING1_CS; - if ( (stack_ec->ss & 3) == 0 ) - stack_ec->ss = FLAT_RING1_DS; - - SET_FAST_TRAP(&next_p->thread); - - /* Switch the guest OS ring-1 stack. */ - tss->esp1 = next->guestos_sp; - tss->ss1 = next->guestos_ss; - - /* Maybe switch the debug registers. */ - if ( unlikely(next->debugreg[7]) ) - { - loaddebug(next, 0); - loaddebug(next, 1); - loaddebug(next, 2); - loaddebug(next, 3); - /* no 4 and 5 */ - loaddebug(next, 6); - loaddebug(next, 7); - } - - /* Switch page tables. */ - write_ptbase(&next_p->mm); - tlb_clocktick(); - } - - if ( unlikely(prev_p->io_bitmap != NULL) || - unlikely(next_p->io_bitmap != NULL) ) - { - if ( next_p->io_bitmap != NULL ) - { - /* Copy in the appropriate parts of the IO bitmap. We use the - * selector to copy only the interesting parts of the bitmap. */ - - u64 old_sel = ~0ULL; /* IO bitmap selector for previous task. */ - - if ( prev_p->io_bitmap != NULL) - { - old_sel = prev_p->io_bitmap_sel; - - /* Replace any areas of the IO bitmap that had bits cleared. */ - for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) - if ( !test_bit(i, &prev_p->io_bitmap_sel) ) - memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], - &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS], - IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); - } - - /* Copy in any regions of the new task's bitmap that have bits - * clear and we haven't already dealt with. */ - for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) - { - if ( test_bit(i, &old_sel) - && !test_bit(i, &next_p->io_bitmap_sel) ) - memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], - &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS], - IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); - } - - tss->bitmap = IO_BITMAP_OFFSET; - - } - else - { - /* In this case, we're switching FROM a task with IO port access, - * to a task that doesn't use the IO bitmap. We set any TSS bits - * that might have been cleared, ready for future use. */ - for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) - if ( !test_bit(i, &prev_p->io_bitmap_sel) ) - memset(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], - 0xFF, IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); - - /* - * a bitmap offset pointing outside of the TSS limit - * causes a nicely controllable SIGSEGV if a process - * tries to use a port IO instruction. The first - * sys_ioperm() call sets up the bitmap properly. - */ - tss->bitmap = INVALID_IO_BITMAP_OFFSET; - } - } - - set_current(next_p); - - /* Switch GDT and LDT. */ - __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt)); - load_LDT(next_p); - - __sti(); -} - - -/* XXX Currently the 'domain' field is ignored! XXX */ -long do_iopl(domid_t domain, unsigned int new_io_pl) -{ - execution_context_t *ec = get_execution_context(); - ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12); - return 0; -} diff --git a/xen/arch/i386/rwlock.c b/xen/arch/i386/rwlock.c deleted file mode 100644 index 2ef7af16b1..0000000000 --- a/xen/arch/i386/rwlock.c +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include - -#if defined(CONFIG_SMP) -asm( -".align 4\n" -".globl __write_lock_failed\n" -"__write_lock_failed:\n" -" " LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" -"1: rep; nop\n" -" cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n" -" jne 1b\n" -" " LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n" -" jnz __write_lock_failed\n" -" ret\n" - -".align 4\n" -".globl __read_lock_failed\n" -"__read_lock_failed:\n" -" lock ; incl (%eax)\n" -"1: rep; nop\n" -" cmpl $1,(%eax)\n" -" js 1b\n" -" lock ; decl (%eax)\n" -" js __read_lock_failed\n" -" ret\n" -); -#endif diff --git a/xen/arch/i386/setup.c b/xen/arch/i386/setup.c deleted file mode 100644 index 70610339b3..0000000000 --- a/xen/arch/i386/setup.c +++ /dev/null @@ -1,450 +0,0 @@ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -char ignore_irq13; /* set if exception 16 works */ -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; - -/* Lots of nice things, since we only target PPro+. */ -unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE; -EXPORT_SYMBOL(mmu_cr4_features); - -unsigned long wait_init_idle; - -struct task_struct *idle_task[NR_CPUS] = { &idle0_task }; - -#ifdef CONFIG_ACPI_INTERPRETER -int acpi_disabled = 0; -#else -int acpi_disabled = 1; -#endif -EXPORT_SYMBOL(acpi_disabled); - -#ifdef CONFIG_ACPI_BOOT -extern int __initdata acpi_ht; -int acpi_force __initdata = 0; -#endif - -int phys_proc_id[NR_CPUS]; -int logical_proc_id[NR_CPUS]; - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - -/* Probe for the CPUID instruction */ -static int __init have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -void __init get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - - if (!strcmp(v, "GenuineIntel")) - c->x86_vendor = X86_VENDOR_INTEL; - else if (!strcmp(v, "AuthenticAMD")) - c->x86_vendor = X86_VENDOR_AMD; - else if (!strcmp(v, "CyrixInstead")) - c->x86_vendor = X86_VENDOR_CYRIX; - else if (!strcmp(v, "UMC UMC UMC ")) - c->x86_vendor = X86_VENDOR_UMC; - else if (!strcmp(v, "CentaurHauls")) - c->x86_vendor = X86_VENDOR_CENTAUR; - else if (!strcmp(v, "NexGenDriven")) - c->x86_vendor = X86_VENDOR_NEXGEN; - else if (!strcmp(v, "RiseRiseRise")) - c->x86_vendor = X86_VENDOR_RISE; - else if (!strcmp(v, "GenuineTMx86") || - !strcmp(v, "TransmetaCPU")) - c->x86_vendor = X86_VENDOR_TRANSMETA; - else - c->x86_vendor = X86_VENDOR_UNKNOWN; -} - -static void __init init_intel(struct cpuinfo_x86 *c) -{ - extern int opt_noht, opt_noacpi; - - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ - if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) - clear_bit(X86_FEATURE_SEP, &c->x86_capability); - - if ( opt_noht ) - { - opt_noacpi = 1; /* Virtual CPUs only appear in ACPI tables. */ - clear_bit(X86_FEATURE_HT, &c->x86_capability[0]); - } - -#ifdef CONFIG_SMP - if ( test_bit(X86_FEATURE_HT, &c->x86_capability) ) - { - u32 eax, ebx, ecx, edx; - int initial_apic_id, siblings, cpu = smp_processor_id(); - - cpuid(1, &eax, &ebx, &ecx, &edx); - siblings = (ebx & 0xff0000) >> 16; - - if ( siblings <= 1 ) - { - printk(KERN_INFO "CPU#%d: Hyper-Threading is disabled\n", cpu); - } - else if ( siblings > 2 ) - { - panic("We don't support more than two logical CPUs per package!"); - } - else - { - initial_apic_id = ebx >> 24 & 0xff; - phys_proc_id[cpu] = initial_apic_id >> 1; - logical_proc_id[cpu] = initial_apic_id & 1; - printk(KERN_INFO "CPU#%d: Physical ID: %d, Logical ID: %d\n", - cpu, phys_proc_id[cpu], logical_proc_id[cpu]); - } - } -#endif -} - -static void __init init_amd(struct cpuinfo_x86 *c) -{ - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, &c->x86_capability); - - switch(c->x86) - { - case 5: - panic("AMD K6 is not supported.\n"); - case 6: /* An Athlon/Duron. We can trust the BIOS probably */ - break; - } -} - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -void __init identify_cpu(struct cpuinfo_x86 *c) -{ - int junk, i, cpu = smp_processor_id(); - u32 xlvl, tfms; - - phys_proc_id[cpu] = cpu; - logical_proc_id[cpu] = 0; - - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - if ( !have_cpuid_p() ) - panic("Ancient processors not supported\n"); - - /* Get vendor name */ - cpuid(0x00000000, &c->cpuid_level, - (int *)&c->x86_vendor_id[0], - (int *)&c->x86_vendor_id[8], - (int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c); - - if ( c->cpuid_level == 0 ) - panic("Decrepit CPUID not supported\n"); - - cpuid(0x00000001, &tfms, &junk, &junk, - &c->x86_capability[0]); - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - c->x86_mask = tfms & 15; - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ( (xlvl & 0xffff0000) == 0x80000000 ) { - if ( xlvl >= 0x80000001 ) - c->x86_capability[1] = cpuid_edx(0x80000001); - } - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ( (xlvl & 0xffff0000) == 0x80860000 ) { - if ( xlvl >= 0x80860001 ) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - - printk("CPU%d: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", - smp_processor_id(), - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_vendor); - - switch ( c->x86_vendor ) { - case X86_VENDOR_INTEL: - init_intel(c); - break; - case X86_VENDOR_AMD: - init_amd(c); - break; - case X86_VENDOR_UNKNOWN: /* Connectix Virtual PC reports this */ - break; - case X86_VENDOR_CENTAUR: - break; - default: - printk("Unknown CPU identifier (%d): continuing anyway, " - "but might fail.\n", c->x86_vendor); - } - - printk("CPU caps: %08x %08x %08x %08x\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if ( c != &boot_cpu_data ) { - /* AND the already accumulated flags with these */ - for ( i = 0 ; i < NCAPINTS ; i++ ) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } -} - - -unsigned long cpu_initialized; -void __init cpu_init(void) -{ - int nr = smp_processor_id(); - struct tss_struct * t = &init_tss[nr]; - - if ( test_and_set_bit(nr, &cpu_initialized) ) - panic("CPU#%d already initialized!!!\n", nr); - printk("Initializing CPU#%d\n", nr); - - /* Set up GDT and IDT. */ - SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES); - SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS); - __asm__ __volatile__("lgdt %0": "=m" (*current->mm.gdt)); - __asm__ __volatile__("lidt %0": "=m" (idt_descr)); - - /* No nested task. */ - __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); - - /* Ensure FPU gets initialised for each domain. */ - stts(); - - /* Set up and load the per-CPU TSS and LDT. */ - t->ss0 = __HYPERVISOR_DS; - t->esp0 = get_stack_top(); - set_tss_desc(nr,t); - load_TR(nr); - __asm__ __volatile__("lldt %%ax"::"a" (0)); - - /* Clear all 6 debug registers. */ -#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); - CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); -#undef CD - - /* Install correct page table. */ - write_ptbase(¤t->mm); - - init_idle_task(); -} - -static void __init do_initcalls(void) -{ - initcall_t *call; - for ( call = &__initcall_start; call < &__initcall_end; call++ ) - (*call)(); -} - -/* - * IBM-compatible BIOSes place drive info tables at initial interrupt - * vectors 0x41 and 0x46. These are in the for of 16-bit-mode far ptrs. - */ -struct drive_info_struct { unsigned char dummy[32]; } drive_info; -void get_bios_driveinfo(void) -{ - unsigned long seg, off, tab1, tab2; - - off = (unsigned long)*(unsigned short *)(4*0x41+0); - seg = (unsigned long)*(unsigned short *)(4*0x41+2); - tab1 = (seg<<4) + off; - - off = (unsigned long)*(unsigned short *)(4*0x46+0); - seg = (unsigned long)*(unsigned short *)(4*0x46+2); - tab2 = (seg<<4) + off; - - printk("Reading BIOS drive-info tables at 0x%05lx and 0x%05lx\n", - tab1, tab2); - - memcpy(drive_info.dummy+ 0, (char *)tab1, 16); - memcpy(drive_info.dummy+16, (char *)tab2, 16); -} - - -unsigned long pci_mem_start = 0x10000000; - -void __init start_of_day(void) -{ - extern void trap_init(void); - extern void init_IRQ(void); - extern void time_init(void); - extern void timer_bh(void); - extern void init_timervecs(void); - extern void ac_timer_init(void); - extern void initialize_keytable(); - extern void initialize_keyboard(void); - extern int opt_nosmp, opt_watchdog, opt_noacpi, opt_ignorebiostables; - extern int do_timer_lists_from_pit; - unsigned long low_mem_size; - -#ifdef MEMORY_GUARD - /* Unmap the first page of CPU0's stack. */ - extern unsigned long cpu0_stack[]; - memguard_guard_range(cpu0_stack, PAGE_SIZE); -#endif - - open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, - (void *)new_tlbflush_clock_period, - NULL); - - if ( opt_watchdog ) - nmi_watchdog = NMI_LOCAL_APIC; - - /* - * We do this early, but tables are in the lowest 1MB (usually - * 0xfe000-0xfffff). Therefore they're unlikely to ever get clobbered. - */ - get_bios_driveinfo(); - - /* Tell the PCI layer not to allocate too close to the RAM area.. */ - low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff; - if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size; - - identify_cpu(&boot_cpu_data); /* get CPU type info */ - if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR); - if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT); -#ifdef CONFIG_SMP - if ( opt_ignorebiostables ) - { - opt_nosmp = 1; /* No SMP without configuration */ - opt_noacpi = 1; /* ACPI will just confuse matters also */ - } - else - { - find_smp_config(); - smp_alloc_memory(); /* trampoline which other CPUs jump at */ - } -#endif - paging_init(); /* not much here now, but sets up fixmap */ - if ( !opt_noacpi ) - acpi_boot_init(); -#ifdef CONFIG_SMP - if ( smp_found_config ) - get_smp_config(); -#endif - domain_init(); - scheduler_init(); - trap_init(); - init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */ - time_init(); /* installs software handler for HZ clock. */ - softirq_init(); - init_timervecs(); - init_bh(TIMER_BH, timer_bh); - init_apic_mappings(); /* make APICs addressable in our pagetables. */ - -#ifndef CONFIG_SMP - APIC_init_uniprocessor(); -#else - if ( opt_nosmp ) - APIC_init_uniprocessor(); - else - smp_boot_cpus(); - /* - * Does loads of stuff, including kicking the local - * APIC, and the IO APIC after other CPUs are booted. - * Each IRQ is preferably handled by IO-APIC, but - * fall thru to 8259A if we have to (but slower). - */ -#endif - - __sti(); - - initialize_keytable(); /* call back handling for key codes */ - - serial_init_stage2(); - initialize_keyboard(); /* setup keyboard (also for debugging) */ - -#ifdef XEN_DEBUGGER - initialize_pdb(); /* pervasive debugger */ -#endif - - if ( !cpu_has_apic ) - { - do_timer_lists_from_pit = 1; - if ( smp_num_cpus != 1 ) - panic("We need local APICs on SMP machines!"); - } - - ac_timer_init(); /* init accurate timers */ - init_xen_time(); /* initialise the time */ - schedulers_start(); /* start scheduler for each CPU */ - - check_nmi_watchdog(); - -#ifdef CONFIG_PCI - pci_init(); -#endif - do_initcalls(); - -#ifdef CONFIG_SMP - wait_init_idle = cpu_online_map; - clear_bit(smp_processor_id(), &wait_init_idle); - smp_threads_ready = 1; - smp_commence(); /* Tell other CPUs that state of the world is stable. */ - while (wait_init_idle) - { - cpu_relax(); - barrier(); - } -#endif - - watchdog_on = 1; -} diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c deleted file mode 100644 index 3c1082683f..0000000000 --- a/xen/arch/i386/smp.c +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Intel SMP support routines. - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998-99, 2000 Ingo Molnar - * - * This code is released under the GNU General Public License version 2 or - * later. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP - -/* - * Some notes on x86 processor bugs affecting SMP operation: - * - * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. - * The Linux implications for SMP are handled as follows: - * - * Pentium III / [Xeon] - * None of the E1AP-E3AP errata are visible to the user. - * - * E1AP. see PII A1AP - * E2AP. see PII A2AP - * E3AP. see PII A3AP - * - * Pentium II / [Xeon] - * None of the A1AP-A3AP errata are visible to the user. - * - * A1AP. see PPro 1AP - * A2AP. see PPro 2AP - * A3AP. see PPro 7AP - * - * Pentium Pro - * None of 1AP-9AP errata are visible to the normal user, - * except occasional delivery of 'spurious interrupt' as trap #15. - * This is very rare and a non-problem. - * - * 1AP. Linux maps APIC as non-cacheable - * 2AP. worked around in hardware - * 3AP. fixed in C0 and above steppings microcode update. - * Linux does not use excessive STARTUP_IPIs. - * 4AP. worked around in hardware - * 5AP. symmetric IO mode (normal Linux operation) not affected. - * 'noapic' mode has vector 0xf filled out properly. - * 6AP. 'noapic' mode might be affected - fixed in later steppings - * 7AP. We do not assume writes to the LVT deassering IRQs - * 8AP. We do not enable low power mode (deep sleep) during MP bootup - * 9AP. We do not use mixed mode - */ - -/* - * the following functions deal with sending IPIs between CPUs. - * - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. - */ - -static inline int __prepare_ICR (unsigned int shortcut, int vector) -{ - return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; -} - -static inline int __prepare_ICR2 (unsigned int mask) -{ - return SET_APIC_DEST_FIELD(mask); -} - -static inline void __send_IPI_shortcut(unsigned int shortcut, int vector) -{ - /* - * Subtle. In the case of the 'never do double writes' workaround - * we have to lock out interrupts to be safe. As we don't care - * of the value read we use an atomic rmw access to avoid costly - * cli/sti. Otherwise we use an even cheaper single atomic write - * to the APIC. - */ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * No need to touch the target chip field - */ - cfg = __prepare_ICR(shortcut, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); -} - -void send_IPI_self(int vector) -{ - __send_IPI_shortcut(APIC_DEST_SELF, vector); -} - -static inline void send_IPI_mask(int mask, int vector) -{ - unsigned long cfg; - unsigned long flags; - - __save_flags(flags); - __cli(); - - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(mask); - apic_write_around(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); - - __restore_flags(flags); -} - -static inline void send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then - * we get an APIC send error if we try to broadcast. - * thus we have to avoid sending IPIs in this case. - */ - if (!(smp_num_cpus > 1)) - return; - - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); -} - -/* - * ********* XEN NOTICE ********** - * I've left the following comments lying around as they look liek they might - * be useful to get multiprocessor guest OSes going. However, I suspect the - * issues we face will be quite different so I've ripped out all the - * TLBSTATE logic (I didn't understand it anyway :-). These comments do - * not apply to Xen, therefore! -- Keir (8th Oct 2003). - */ -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superflous - * tlb flush. - * 1a2) set cpu_tlbstate to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu_tlbstate[].active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu_tlbstate[].active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu_tlbstate to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu_tlbstate is local to each cpu, no - * write/read ordering problems. - * - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - */ - -static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED; -volatile unsigned long flush_cpumask; - -asmlinkage void smp_invalidate_interrupt(void) -{ - ack_APIC_irq(); - perfc_incrc(ipis); - if ( likely(test_and_clear_bit(smp_processor_id(), &flush_cpumask)) ) - local_flush_tlb(); -} - -void flush_tlb_mask(unsigned long mask) -{ - ASSERT(!in_irq()); - - if ( mask & (1 << smp_processor_id()) ) - { - local_flush_tlb(); - mask &= ~(1 << smp_processor_id()); - } - - if ( mask != 0 ) - { - /* - * We are certainly not reentering a flush_lock region on this CPU - * because we are not in an IRQ context. We can therefore wait for the - * other guy to release the lock. This is harder than it sounds because - * local interrupts might be disabled, and he may be waiting for us to - * execute smp_invalidate_interrupt(). We deal with this possibility by - * inlining the meat of that function here. - */ - while ( unlikely(!spin_trylock(&flush_lock)) ) - { - if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) ) - local_flush_tlb(); - rep_nop(); - } - - flush_cpumask = mask; - send_IPI_mask(mask, INVALIDATE_TLB_VECTOR); - while ( flush_cpumask != 0 ) - { - rep_nop(); - barrier(); - } - - spin_unlock(&flush_lock); - } -} - -/* - * NB. Must be called with no locks held and interrupts enabled. - * (e.g., softirq context). - */ -void new_tlbflush_clock_period(void) -{ - spin_lock(&flush_lock); - - /* Someone may acquire the lock and execute the flush before us. */ - if ( ((tlbflush_clock+1) & TLBCLOCK_EPOCH_MASK) != 0 ) - goto out; - - if ( smp_num_cpus > 1 ) - { - /* Flush everyone else. We definitely flushed just before entry. */ - flush_cpumask = ((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id()); - send_IPI_allbutself(INVALIDATE_TLB_VECTOR); - while ( flush_cpumask != 0 ) - { - rep_nop(); - barrier(); - } - } - - /* No need for atomicity: we are the only possible updater. */ - tlbflush_clock++; - - out: - spin_unlock(&flush_lock); -} - -static void flush_tlb_all_pge_ipi(void* info) -{ - __flush_tlb_pge(); -} - -void flush_tlb_all_pge(void) -{ - smp_call_function (flush_tlb_all_pge_ipi,0,1,1); - __flush_tlb_pge(); -} - -void smp_send_event_check_mask(unsigned long cpu_mask) -{ - send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR); -} - -/* - * Structure and data for smp_call_function(). This is designed to minimise - * static memory requirements. It also looks cleaner. - */ -static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; - -struct call_data_struct { - void (*func) (void *info); - void *info; - atomic_t started; - atomic_t finished; - int wait; -}; - -static struct call_data_struct * call_data; - -/* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ - -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, - int wait) -/* - * [SUMMARY] Run a function on all other CPUs. - * The function to run. This must be fast and non-blocking. - * An arbitrary pointer to pass to the function. - * currently unused. - * If true, wait (atomically) until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. Does not return until - * remote CPUs are nearly ready to execute <> or are or have executed. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler, or bottom halfs. - */ -{ - struct call_data_struct data; - int cpus = smp_num_cpus-1; - - if (!cpus) - return 0; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - ASSERT(local_irq_is_enabled()); - - spin_lock(&call_lock); - - call_data = &data; - wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - barrier(); - - if (wait) - while (atomic_read(&data.finished) != cpus) - barrier(); - - spin_unlock(&call_lock); - - return 0; -} - -static void stop_this_cpu (void * dummy) -{ - /* - * Remove this CPU: - */ - clear_bit(smp_processor_id(), &cpu_online_map); - __cli(); - disable_local_APIC(); - for(;;) __asm__("hlt"); -} - -/* - * this function calls the 'stop' function on all other CPUs in the system. - */ - -void smp_send_stop(void) -{ - smp_call_function(stop_this_cpu, NULL, 1, 0); - smp_num_cpus = 1; - - __cli(); - disable_local_APIC(); - __sti(); -} - -/* - * Nothing to do, as all the work is done automatically when - * we return from the interrupt. - */ -asmlinkage void smp_event_check_interrupt(void) -{ - ack_APIC_irq(); - perfc_incrc(ipis); -} - -asmlinkage void smp_call_function_interrupt(void) -{ - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; - - ack_APIC_irq(); - perfc_incrc(ipis); - - /* - * Notify initiating CPU that I've grabbed the data and am - * about to execute the function - */ - mb(); - atomic_inc(&call_data->started); - /* - * At this point the info structure may be out of scope unless wait==1 - */ - (*func)(info); - if (wait) { - mb(); - atomic_inc(&call_data->finished); - } -} - -#endif /* CONFIG_SMP */ diff --git a/xen/arch/i386/smpboot.c b/xen/arch/i386/smpboot.c deleted file mode 100644 index e91f95d73f..0000000000 --- a/xen/arch/i386/smpboot.c +++ /dev/null @@ -1,950 +0,0 @@ -/* - * x86 SMP booting functions - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar - * - * Much of the core SMP work is based on previous work by Thomas Radke, to - * whom a great many thanks are extended. - * - * Thanks to Intel for making available several different Pentium, - * Pentium Pro and Pentium-II/Xeon MP machines. - * Original development of Linux SMP code supported by Caldera. - * - * This code is released under the GNU General Public License version 2 or - * later. - * - * Fixes - * Felix Koop : NR_CPUS used properly - * Jose Renau : Handle single CPU case. - * Alan Cox : By repeated request 8) - Total BogoMIP report. - * Greg Wright : Fix for kernel stacks panic. - * Erich Boleyn : MP v1.4 and additional changes. - * Matthias Sattler : Changes for 2.1 kernel map. - * Michel Lespinasse : Changes for 2.1 kernel map. - * Michael Chastain : Change trampoline.S to gnu as. - * Alan Cox : Dumb bug: 'B' step PPro's are fine - * Ingo Molnar : Added APIC timers, based on code - * from Jose Renau - * Ingo Molnar : various cleanups and rewrites - * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. - * Maciej W. Rozycki : Bits for genuine 82489DX APICs - * Martin J. Bligh : Added support for multi-quad systems - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP - -/* Set if we find a B stepping CPU */ -static int smp_b_stepping; - -/* Setup configured maximum number of CPUs to activate */ -static int max_cpus = -1; - -/* Total count of live CPUs */ -int smp_num_cpus = 1; - -/* Bitmask of currently online CPUs */ -unsigned long cpu_online_map; - -static volatile unsigned long cpu_callin_map; -static volatile unsigned long cpu_callout_map; - -/* Per CPU bogomips and other parameters */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; - -/* Set when the idlers are all forked */ -int smp_threads_ready; - -/* - * Trampoline 80x86 program as an array. - */ - -extern unsigned char trampoline_data []; -extern unsigned char trampoline_end []; -static unsigned char *trampoline_base; - -/* - * Currently trivial. Write the real->protected mode - * bootstrap into the page concerned. The caller - * has made sure it's suitably aligned. - */ - -static unsigned long __init setup_trampoline(void) -{ - memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); - return virt_to_phys(trampoline_base); -} - -/* - * We are called very early to get the low memory for the - * SMP bootup trampoline page. - */ -void __init smp_alloc_memory(void) -{ - /* - * Has to be in very low memory so we can execute - * real-mode AP code. - */ - trampoline_base = __va(0x90000); -} - -/* - * The bootstrap kernel entry code has set these up. Save them for - * a given CPU - */ - -void __init smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = cpu_data + id; - - *c = boot_cpu_data; - c->pte_quick = 0; - c->pmd_quick = 0; - c->pgd_quick = 0; - c->pgtable_cache_sz = 0; - identify_cpu(c); - /* - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && - c->x86_model <= 3) - /* - * Remember we have B step Pentia with bugs - */ - smp_b_stepping = 1; -} - -/* - * Architecture specific routine called by the kernel just before init is - * fired off. This allows the BP to have everything in order [we hope]. - * At the end of this all the APs will hit the system scheduling and off - * we go. Each AP will load the system gdt's and jump through the kernel - * init into idle(). At this point the scheduler will one day take over - * and give them jobs to do. smp_callin is a standard routine - * we use to track CPUs as they power up. - */ - -static atomic_t smp_commenced = ATOMIC_INIT(0); - -void __init smp_commence(void) -{ - /* - * Lets the callins below out of their loop. - */ - Dprintk("Setting commenced=1, go go go\n"); - - wmb(); - atomic_set(&smp_commenced,1); -} - -/* - * TSC synchronization. - * - * We first check wether all CPUs have their TSC's synchronized, - * then we print a warning if not, and always resync. - */ - -static atomic_t tsc_start_flag = ATOMIC_INIT(0); -static atomic_t tsc_count_start = ATOMIC_INIT(0); -static atomic_t tsc_count_stop = ATOMIC_INIT(0); -static unsigned long long tsc_values[NR_CPUS]; - -#define NR_LOOPS 5 - -/* - * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit - * multiplication. Not terribly optimized but we need it at boot time only - * anyway. - * - * result == a / b - * == (a1 + a2*(2^32)) / b - * == a1/b + a2*(2^32/b) - * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b - * ^---- (this multiplication can overflow) - */ - -static unsigned long long div64 (unsigned long long a, unsigned long b0) -{ - unsigned int a1, a2; - unsigned long long res; - - a1 = ((unsigned int*)&a)[0]; - a2 = ((unsigned int*)&a)[1]; - - res = a1/b0 + - (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) + - a2 / b0 + - (a2 * (0xffffffff % b0)) / b0; - - return res; -} - -static void __init synchronize_tsc_bp (void) -{ - int i; - unsigned long long t0; - unsigned long long sum, avg; - long long delta; - int buggy = 0; - - printk("checking TSC synchronization across CPUs: "); - - atomic_set(&tsc_start_flag, 1); - wmb(); - - /* - * We loop a few times to get a primed instruction cache, - * then the last pass is more or less synchronized and - * the BP and APs set their cycle counters to zero all at - * once. This reduces the chance of having random offsets - * between the processors, and guarantees that the maximum - * delay between the cycle counters is never bigger than - * the latency of information-passing (cachelines) between - * two CPUs. - */ - for (i = 0; i < NR_LOOPS; i++) { - /* - * all APs synchronize but they loop on '== num_cpus' - */ - while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb(); - atomic_set(&tsc_count_stop, 0); - wmb(); - /* - * this lets the APs save their current TSC: - */ - atomic_inc(&tsc_count_start); - - rdtscll(tsc_values[smp_processor_id()]); - /* - * We clear the TSC in the last loop: - */ - if (i == NR_LOOPS-1) - write_tsc(0, 0); - - /* - * Wait for all APs to leave the synchronization point: - */ - while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb(); - atomic_set(&tsc_count_start, 0); - wmb(); - atomic_inc(&tsc_count_stop); - } - - sum = 0; - for (i = 0; i < smp_num_cpus; i++) { - t0 = tsc_values[i]; - sum += t0; - } - avg = div64(sum, smp_num_cpus); - - sum = 0; - for (i = 0; i < smp_num_cpus; i++) { - delta = tsc_values[i] - avg; - if (delta < 0) - delta = -delta; - /* - * We report bigger than 2 microseconds clock differences. - */ - if (delta > 2*ticks_per_usec) { - long realdelta; - if (!buggy) { - buggy = 1; - printk("\n"); - } - realdelta = div64(delta, ticks_per_usec); - if (tsc_values[i] < avg) - realdelta = -realdelta; - - printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", - i, realdelta); - } - - sum += delta; - } - if (!buggy) - printk("passed.\n"); -} - -static void __init synchronize_tsc_ap (void) -{ - int i; - - /* - * smp_num_cpus is not necessarily known at the time - * this gets called, so we first wait for the BP to - * finish SMP initialization: - */ - while (!atomic_read(&tsc_start_flag)) mb(); - - for (i = 0; i < NR_LOOPS; i++) { - atomic_inc(&tsc_count_start); - while (atomic_read(&tsc_count_start) != smp_num_cpus) mb(); - - rdtscll(tsc_values[smp_processor_id()]); - if (i == NR_LOOPS-1) - write_tsc(0, 0); - - atomic_inc(&tsc_count_stop); - while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb(); - } -} -#undef NR_LOOPS - -static atomic_t init_deasserted; - -void __init smp_callin(void) -{ - int cpuid, phys_id, i; - - /* - * If waken up by an INIT in an 82489DX configuration - * we may get here before an INIT-deassert IPI reaches - * our local APIC. We have to wait for the IPI or we'll - * lock up on an APIC access. - */ - while (!atomic_read(&init_deasserted)); - - /* - * (This works even if the APIC is not enabled.) - */ - phys_id = GET_APIC_ID(apic_read(APIC_ID)); - cpuid = smp_processor_id(); - if (test_and_set_bit(cpuid, &cpu_online_map)) { - printk("huh, phys CPU#%d, CPU#%d already present??\n", - phys_id, cpuid); - BUG(); - } - Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); - - /* - * STARTUP IPIs are fragile beasts as they might sometimes - * trigger some glue motherboard logic. Complete APIC bus - * silence for 1 second, this overestimates the time the - * boot CPU is spending to send the up to 2 STARTUP IPIs - * by a factor of two. This should be enough. - */ - - for ( i = 0; i < 200; i++ ) - { - if ( test_bit(cpuid, &cpu_callout_map) ) break; - mdelay(10); - } - - if (!test_bit(cpuid, &cpu_callout_map)) { - printk("BUG: CPU%d started up but did not get a callout!\n", - cpuid); - BUG(); - } - - /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) - */ - - Dprintk("CALLIN, before setup_local_APIC().\n"); - - setup_local_APIC(); - - __sti(); - -#ifdef CONFIG_MTRR - /* - * Must be done before calibration delay is computed - */ - mtrr_init_secondary_cpu (); -#endif - - Dprintk("Stack at about %p\n",&cpuid); - - /* - * Save our processor parameters - */ - smp_store_cpu_info(cpuid); - - if (nmi_watchdog == NMI_LOCAL_APIC) - setup_apic_nmi_watchdog(); - - /* - * Allow the master to continue. - */ - set_bit(cpuid, &cpu_callin_map); - - /* - * Synchronize the TSC with the BP - */ - synchronize_tsc_ap(); -} - -static int cpucount; - -/* - * Activate a secondary processor. - */ -void __init start_secondary(void) -{ - unsigned int cpu = cpucount; - /* 6 bytes suitable for passing to LIDT instruction. */ - unsigned char idt_load[6]; - - extern void cpu_init(void); - - set_current(idle_task[cpu]); - - /* - * Dont put anything before smp_callin(), SMP - * booting is too fragile that we want to limit the - * things done here to the most necessary things. - */ - cpu_init(); - smp_callin(); - - while (!atomic_read(&smp_commenced)) - rep_nop(); - - /* - * At this point, boot CPU has fully initialised the IDT. It is - * now safe to make ourselves a private copy. - */ - idt_tables[cpu] = kmalloc(IDT_ENTRIES*8, GFP_KERNEL); - memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8); - *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1; - *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu]; - __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) ); - - /* - * low-memory mappings have been cleared, flush them from the local TLBs - * too. - */ - local_flush_tlb(); - - startup_cpu_idle_loop(); - - BUG(); -} - -extern struct { - unsigned long esp, ss; -} stack_start; - -/* which physical APIC ID maps to which logical CPU number */ -volatile int physical_apicid_2_cpu[MAX_APICID]; -/* which logical CPU number maps to which physical APIC ID */ -volatile int cpu_2_physical_apicid[NR_CPUS]; - -/* which logical APIC ID maps to which logical CPU number */ -volatile int logical_apicid_2_cpu[MAX_APICID]; -/* which logical CPU number maps to which logical APIC ID */ -volatile int cpu_2_logical_apicid[NR_CPUS]; - -static inline void init_cpu_to_apicid(void) -/* Initialize all maps between cpu number and apicids */ -{ - int apicid, cpu; - - for (apicid = 0; apicid < MAX_APICID; apicid++) { - physical_apicid_2_cpu[apicid] = -1; - logical_apicid_2_cpu[apicid] = -1; - } - for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpu_2_physical_apicid[cpu] = -1; - cpu_2_logical_apicid[cpu] = -1; - } -} - -static inline void map_cpu_to_boot_apicid(int cpu, int apicid) -/* - * set up a mapping between cpu and apicid. Uses logical apicids for multiquad, - * else physical apic ids - */ -{ - physical_apicid_2_cpu[apicid] = cpu; - cpu_2_physical_apicid[cpu] = apicid; -} - -static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid) -/* - * undo a mapping between cpu and apicid. Uses logical apicids for multiquad, - * else physical apic ids - */ -{ - physical_apicid_2_cpu[apicid] = -1; - cpu_2_physical_apicid[cpu] = -1; -} - -#if APIC_DEBUG -static inline void inquire_remote_apic(int apicid) -{ - int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; - char *names[] = { "ID", "VERSION", "SPIV" }; - int timeout, status; - - printk("Inquiring remote APIC #%d...\n", apicid); - - for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { - printk("... APIC #%d %s: ", apicid, names[i]); - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); - - timeout = 0; - do { - udelay(100); - status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; - } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); - - switch (status) { - case APIC_ICR_RR_VALID: - status = apic_read(APIC_RRR); - printk("%08x\n", status); - break; - default: - printk("failed\n"); - } - } -} -#endif - - -static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip) -{ - unsigned long send_status = 0, accept_status = 0; - int maxlvt, timeout, num_starts, j; - - Dprintk("Asserting INIT.\n"); - - /* - * Turn INIT on target chip - */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* - * Send IPI - */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - mdelay(10); - - Dprintk("Deasserting INIT.\n"); - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - atomic_set(&init_deasserted, 1); - - /* - * Should we send STARTUP IPIs ? - * - * Determine this based on the APIC version. - * If we don't have an integrated APIC, don't send the STARTUP IPIs. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) - num_starts = 2; - else - num_starts = 0; - - /* - * Run STARTUP IPI loop. - */ - Dprintk("#startup loops: %d.\n", num_starts); - - maxlvt = get_maxlvt(); - - for (j = 1; j <= num_starts; j++) { - Dprintk("Sending STARTUP #%d.\n",j); - - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - Dprintk("After apic_write.\n"); - - /* - * STARTUP IPI - */ - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Boot on the stack */ - /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_eip >> 12)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(300); - - Dprintk("Startup point 1.\n"); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - } - accept_status = (apic_read(APIC_ESR) & 0xEF); - if (send_status || accept_status) - break; - } - Dprintk("After Startup.\n"); - - if (send_status) - printk("APIC never delivered???\n"); - if (accept_status) - printk("APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} - -extern unsigned long cpu_initialized; - -static void __init do_boot_cpu (int apicid) -/* - * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad - * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - */ -{ - struct task_struct *idle; - unsigned long boot_error = 0; - int timeout, cpu; - unsigned long start_eip, stack; - - cpu = ++cpucount; - - if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL ) - panic("failed 'createdomain' for CPU %d", cpu); - - set_bit(PF_IDLETASK, &idle->flags); - - idle->mm.pagetable = mk_pagetable(__pa(idle_pg_table)); - - map_cpu_to_boot_apicid(cpu, apicid); - - SET_DEFAULT_FAST_TRAP(&idle->thread); - - idle_task[cpu] = idle; - - /* start_eip had better be page-aligned! */ - start_eip = setup_trampoline(); - - /* So we see what's up. */ - printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); - - stack = __pa(__get_free_pages(GFP_KERNEL, 1)); - stack_start.esp = stack + STACK_SIZE - STACK_RESERVED; - - /* Debug build: detect stack overflow by setting up a guard page. */ - memguard_guard_range(__va(stack), PAGE_SIZE); - - /* - * This grunge runs the startup process for - * the targeted processor. - */ - - atomic_set(&init_deasserted, 0); - - Dprintk("Setting warm reset code and vector.\n"); - - CMOS_WRITE(0xa, 0xf); - local_flush_tlb(); - Dprintk("1.\n"); - *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; - Dprintk("2.\n"); - *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; - Dprintk("3.\n"); - - /* - * Be paranoid about clearing APIC errors. - */ - if ( APIC_INTEGRATED(apic_version[apicid]) ) - { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } - - /* - * Status is now clean - */ - boot_error = 0; - - /* - * Starting actual IPI sequence... - */ - - boot_error = wakeup_secondary_via_INIT(apicid, start_eip); - - if (!boot_error) { - /* - * allow APs to start initializing. - */ - Dprintk("Before Callout %d.\n", cpu); - set_bit(cpu, &cpu_callout_map); - Dprintk("After Callout %d.\n", cpu); - - /* - * Wait 5s total for a response - */ - for (timeout = 0; timeout < 50000; timeout++) { - if (test_bit(cpu, &cpu_callin_map)) - break; /* It has booted */ - udelay(100); - } - - if (test_bit(cpu, &cpu_callin_map)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - printk("CPU%d has booted.\n", cpu); - } else { - boot_error= 1; - if (*((volatile unsigned long *)phys_to_virt(start_eip)) - == 0xA5A5A5A5) - /* trampoline started but...? */ - printk("Stuck ??\n"); - else - /* trampoline code not run */ - printk("Not responding.\n"); -#if APIC_DEBUG - inquire_remote_apic(apicid); -#endif - } - } - if (boot_error) { - /* Try to put things back the way they were before ... */ - unmap_cpu_to_boot_apicid(cpu, apicid); - clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */ - clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ - clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */ - cpucount--; - } -} - - -/* - * Cycle through the processors sending APIC IPIs to boot each. - */ - -static int boot_cpu_logical_apicid; -/* Where the IO area was mapped on multiquad, always 0 otherwise */ -void *xquad_portio = NULL; - -void __init smp_boot_cpus(void) -{ - int apicid, bit; - -#ifdef CONFIG_MTRR - /* Must be done before other processors booted */ - mtrr_init_boot_cpu (); -#endif - /* Initialize the logical to physical CPU number mapping */ - init_cpu_to_apicid(); - - /* - * Setup boot CPU information - */ - smp_store_cpu_info(0); /* Final full version of the data */ - printk("CPU%d booted\n", 0); - - /* - * We have the boot CPU online for sure. - */ - set_bit(0, &cpu_online_map); - boot_cpu_logical_apicid = logical_smp_processor_id(); - map_cpu_to_boot_apicid(0, boot_cpu_apicid); - - /* - * If we couldnt find an SMP configuration at boot time, - * get out of here now! - */ - if (!smp_found_config) { - printk("SMP motherboard not detected.\n"); - io_apic_irqs = 0; - cpu_online_map = phys_cpu_present_map = 1; - smp_num_cpus = 1; - if (APIC_init_uniprocessor()) - printk("Local APIC not detected." - " Using dummy APIC emulation.\n"); - goto smp_done; - } - - /* - * Should not be necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - */ - if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) { - printk("weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); - phys_cpu_present_map |= (1 << hard_smp_processor_id()); - } - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && - !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) { - printk("BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - io_apic_irqs = 0; - cpu_online_map = phys_cpu_present_map = 1; - smp_num_cpus = 1; - goto smp_done; - } - - verify_local_APIC(); - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { - smp_found_config = 0; - printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n"); - io_apic_irqs = 0; - cpu_online_map = phys_cpu_present_map = 1; - smp_num_cpus = 1; - goto smp_done; - } - - connect_bsp_APIC(); - setup_local_APIC(); - - if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) - BUG(); - - /* - * Scan the CPU present map and fire up the other CPUs via do_boot_cpu - * - * In clustered apic mode, phys_cpu_present_map is a constructed thus: - * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the - * clustered apic ID. - */ - Dprintk("CPU present map: %lx\n", phys_cpu_present_map); - - for (bit = 0; bit < NR_CPUS; bit++) { - apicid = cpu_present_to_apicid(bit); - /* - * Don't even attempt to start the boot CPU! - */ - if (apicid == boot_cpu_apicid) - continue; - - if (!(phys_cpu_present_map & (1 << bit))) - continue; - if ((max_cpus >= 0) && (max_cpus <= cpucount+1)) - continue; - - do_boot_cpu(apicid); - - /* - * Make sure we unmap all failed CPUs - */ - if ((boot_apicid_to_cpu(apicid) == -1) && - (phys_cpu_present_map & (1 << bit))) - printk("CPU #%d not responding - cannot use it.\n", - apicid); - } - - /* - * Cleanup possible dangling ends... - */ - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - - /* - * Paranoid: Set warm reset code and vector here back - * to default values. - */ - CMOS_WRITE(0, 0xf); - - *((volatile long *) phys_to_virt(0x467)) = 0; - - if (!cpucount) { - printk("Error: only one processor found.\n"); - } else { - printk("Total of %d processors activated.\n", cpucount+1); - } - smp_num_cpus = cpucount + 1; - - if (smp_b_stepping) - printk("WARNING: SMP operation may" - " be unreliable with B stepping processors.\n"); - Dprintk("Boot done.\n"); - - /* - * Here we can be sure that there is an IO-APIC in the system. Let's - * go and set it up: - */ - if ( nr_ioapics ) setup_IO_APIC(); - - /* Set up all local APIC timers in the system. */ - setup_APIC_clocks(); - - /* Synchronize the TSC with the AP(s). */ - if ( cpucount ) synchronize_tsc_bp(); - - smp_done: - ; -} - -#endif /* CONFIG_SMP */ diff --git a/xen/arch/i386/time.c b/xen/arch/i386/time.c deleted file mode 100644 index 9cd6da1955..0000000000 --- a/xen/arch/i386/time.c +++ /dev/null @@ -1,386 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- - **************************************************************************** - * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge - * (C) 2002-2003 University of Cambridge - **************************************************************************** - * - * File: i386/time.c - * Author: Rolf Neugebar & Keir Fraser - * - * Environment: Xen Hypervisor - * Description: modified version of Linux' time.c - * implements system and wall clock time. - * based on freebsd's implementation. - */ - -/* - * linux/arch/i386/kernel/time.c - * - * Copyright (C) 1991, 1992, 1995 Linus Torvalds - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -extern rwlock_t xtime_lock; -extern unsigned long wall_jiffies; - -/* GLOBAL */ -unsigned long cpu_khz; /* Detected as we calibrate the TSC */ -unsigned long ticks_per_usec; /* TSC ticks per microsecond. */ -spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; -int timer_ack = 0; -int do_timer_lists_from_pit = 0; - -/* PRIVATE */ -static unsigned int rdtsc_bitshift; /* Which 32 bits of TSC do we use? */ -static u64 cpu_freq; /* CPU frequency (Hz) */ -static u32 st_scale_f; /* Cycles -> ns, fractional part */ -static u32 st_scale_i; /* Cycles -> ns, integer part */ -static u32 tsc_irq; /* CPU0's TSC at last 'time update' */ -static s_time_t stime_irq; /* System time at last 'time update' */ - -static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - u64 full_tsc; - - write_lock(&xtime_lock); - -#ifdef CONFIG_X86_IO_APIC - if ( timer_ack ) - { - extern spinlock_t i8259A_lock; - spin_lock(&i8259A_lock); - outb(0x0c, 0x20); - /* Ack the IRQ; AEOI will end it automatically. */ - inb(0x20); - spin_unlock(&i8259A_lock); - } -#endif - - /* - * Updates TSC timestamp (used to interpolate passage of time between - * interrupts). - */ - rdtscll(full_tsc); - tsc_irq = (u32)(full_tsc >> rdtsc_bitshift); - - /* Updates xtime (wallclock time). */ - do_timer(regs); - - /* Updates system time (nanoseconds since boot). */ - stime_irq += MILLISECS(1000/HZ); - - write_unlock(&xtime_lock); - - /* Rough hack to allow accurate timers to sort-of-work with no APIC. */ - if ( do_timer_lists_from_pit ) - __cpu_raise_softirq(smp_processor_id(), AC_TIMER_SOFTIRQ); -} - -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, - "timer", NULL, NULL}; - -/* ------ Calibrate the TSC ------- - * Return processor ticks per second / CALIBRATE_FRAC. - */ - -#define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */ -#define CALIBRATE_FRAC 20 /* calibrate over 50ms */ -#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC) - -static unsigned long __init calibrate_tsc(void) -{ - unsigned long startlow, starthigh, endlow, endhigh, count; - - /* Set the Gate high, disable speaker */ - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - /* - * Now let's take care of CTC channel 2 - * - * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on - * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB) - * to begin countdown. - */ - outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ - outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */ - outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */ - - rdtsc(startlow, starthigh); - for ( count = 0; (inb(0x61) & 0x20) == 0; count++ ) - continue; - rdtsc(endlow, endhigh); - - /* Error if the CTC doesn't behave itself. */ - if ( count == 0 ) - return 0; - - /* [endhigh:endlow] = [endhigh:endlow] - [starthigh:startlow] */ - __asm__( "subl %2,%0 ; sbbl %3,%1" - : "=a" (endlow), "=d" (endhigh) - : "g" (startlow), "g" (starthigh), "0" (endlow), "1" (endhigh) ); - - /* If quotient doesn't fit in 32 bits then we return error (zero). */ - return endhigh ? 0 : endlow; -} - - -/*************************************************************************** - * CMOS Timer functions - ***************************************************************************/ - -/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. - * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 - * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. - * - * [For the Julian calendar (which was used in Russia before 1917, - * Britain & colonies before 1752, anywhere else before 1582, - * and is still in use by some communities) leave out the - * -year/100+year/400 terms, and add 10.] - * - * This algorithm was first published by Gauss (I think). - * - * WARNING: this function will overflow on 2106-02-07 06:28:16 on - * machines were long is 32-bit! (However, as time_t is signed, we - * will already get problems at other places on 2038-01-19 03:14:08) - */ -static inline unsigned long -mktime (unsigned int year, unsigned int mon, - unsigned int day, unsigned int hour, - unsigned int min, unsigned int sec) -{ - /* 1..12 -> 11,12,1..10: put Feb last since it has a leap day. */ - if ( 0 >= (int) (mon -= 2) ) - { - mon += 12; - year -= 1; - } - - return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+ - year*365 - 719499 - )*24 + hour /* now have hours */ - )*60 + min /* now have minutes */ - )*60 + sec; /* finally seconds */ -} - -static unsigned long __get_cmos_time(void) -{ - unsigned int year, mon, day, hour, min, sec; - - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - - if ( !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) - { - BCD_TO_BIN(sec); - BCD_TO_BIN(min); - BCD_TO_BIN(hour); - BCD_TO_BIN(day); - BCD_TO_BIN(mon); - BCD_TO_BIN(year); - } - - if ( (year += 1900) < 1970 ) - year += 100; - - return mktime(year, mon, day, hour, min, sec); -} - -static unsigned long get_cmos_time(void) -{ - unsigned long res, flags; - int i; - - spin_lock_irqsave(&rtc_lock, flags); - - /* read RTC exactly on falling edge of update flag */ - for ( i = 0 ; i < 1000000 ; i++ ) /* may take up to 1 second... */ - if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) ) - break; - for ( i = 0 ; i < 1000000 ; i++ ) /* must try at least 2.228 ms */ - if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) ) - break; - - res = __get_cmos_time(); - - spin_unlock_irqrestore(&rtc_lock, flags); - return res; -} - -/*************************************************************************** - * System Time - ***************************************************************************/ - -static inline u64 get_time_delta(void) -{ - s32 delta_tsc; - u32 low; - u64 delta, tsc; - - rdtscll(tsc); - low = (u32)(tsc >> rdtsc_bitshift); - delta_tsc = (s32)(low - tsc_irq); - if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; - delta = ((u64)delta_tsc * st_scale_f); - delta >>= 32; - delta += ((u64)delta_tsc * st_scale_i); - - return delta; -} - -s_time_t get_s_time(void) -{ - s_time_t now; - unsigned long flags; - - read_lock_irqsave(&xtime_lock, flags); - - now = stime_irq + get_time_delta(); - - /* Ensure that the returned system time is monotonically increasing. */ - { - static s_time_t prev_now = 0; - if ( unlikely(now < prev_now) ) - now = prev_now; - prev_now = now; - } - - read_unlock_irqrestore(&xtime_lock, flags); - - return now; -} - - -void update_dom_time(shared_info_t *si) -{ - unsigned long flags; - - read_lock_irqsave(&xtime_lock, flags); - - si->time_version1++; - wmb(); - - si->cpu_freq = cpu_freq; - si->tsc_timestamp.tsc_bitshift = rdtsc_bitshift; - si->tsc_timestamp.tsc_bits = tsc_irq; - si->system_time = stime_irq; - si->wc_sec = xtime.tv_sec; - si->wc_usec = xtime.tv_usec; - si->wc_usec += (jiffies - wall_jiffies) * (1000000 / HZ); - while ( si->wc_usec >= 1000000 ) - { - si->wc_usec -= 1000000; - si->wc_sec++; - } - - wmb(); - si->time_version2++; - - read_unlock_irqrestore(&xtime_lock, flags); -} - - -/* Set clock to after 00:00:00 UTC, 1 January, 1970. */ -void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base) -{ - s64 delta; - long _usecs = (long)usecs; - - write_lock_irq(&xtime_lock); - - delta = (s64)(stime_irq - system_time_base); - - _usecs += (long)(delta/1000); - _usecs -= (jiffies - wall_jiffies) * (1000000 / HZ); - - while ( _usecs < 0 ) - { - _usecs += 1000000; - secs--; - } - - xtime.tv_sec = secs; - xtime.tv_usec = _usecs; - - write_unlock_irq(&xtime_lock); - - update_dom_time(current->shared_info); -} - - -/* Late init function (after all CPUs are booted). */ -int __init init_xen_time() -{ - u64 scale; - u64 full_tsc; - unsigned int cpu_ghz; - - cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL); - for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 ) - continue; - - scale = 1000000000LL << (32 + rdtsc_bitshift); - scale /= cpu_freq; - st_scale_f = scale & 0xffffffff; - st_scale_i = scale >> 32; - - /* System time ticks from zero. */ - rdtscll(full_tsc); - stime_irq = (s_time_t)0; - tsc_irq = (u32)(full_tsc >> rdtsc_bitshift); - - /* Wallclock time starts as the initial RTC time. */ - xtime.tv_sec = get_cmos_time(); - - printk("Time init:\n"); - printk(".... System Time: %lldns\n", - NOW()); - printk(".... cpu_freq: %08X:%08X\n", - (u32)(cpu_freq>>32), (u32)cpu_freq); - printk(".... scale: %08X:%08X\n", - (u32)(scale>>32), (u32)scale); - printk(".... Wall Clock: %lds %ldus\n", - xtime.tv_sec, xtime.tv_usec); - - return 0; -} - - -/* Early init function. */ -void __init time_init(void) -{ - unsigned long ticks_per_frac = calibrate_tsc(); - - if ( !ticks_per_frac ) - panic("Error calibrating TSC\n"); - - ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC); - cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC); - - cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC; - - printk("Detected %lu.%03lu MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - - setup_irq(0, &irq0); -} diff --git a/xen/arch/i386/trampoline.S b/xen/arch/i386/trampoline.S deleted file mode 100644 index d9a1cb6888..0000000000 --- a/xen/arch/i386/trampoline.S +++ /dev/null @@ -1,59 +0,0 @@ -/* - * - * Trampoline.S Derived from Setup.S by Linus Torvalds - * - * 4 Jan 1997 Michael Chastain: changed to gnu as. - * - * Entry: CS:IP point to the start of our code, we are - * in real mode with no stack, but the rest of the - * trampoline page to make our stack and everything else - * is a mystery. - * - * On entry to trampoline_data, the processor is in real mode - * with 16-bit addressing and 16-bit data. CS has some value - * and IP is zero. Thus, data addresses need to be absolute - * (no relocation) and are taken with regard to r_base. - */ - -#include -#include -#include - -#ifdef CONFIG_SMP - -.data - -.code16 - -ENTRY(trampoline_data) -r_base = . - mov %cs, %ax # Code and data in the same place - mov %ax, %ds - - movl $0xA5A5A5A5, %ebx # Flag an SMP trampoline - cli # We should be safe anyway - - movl $0xA5A5A5A5, trampoline_data - r_base - - lidt idt_48 - r_base # load idt with 0, 0 - lgdt gdt_48 - r_base # load gdt with whatever is appropriate - - xor %ax, %ax - inc %ax # protected mode (PE) bit - lmsw %ax # into protected mode - jmp flush_instr -flush_instr: - ljmpl $__HYPERVISOR_CS, $(MONITOR_BASE)-__PAGE_OFFSET - -idt_48: - .word 0 # idt limit = 0 - .word 0, 0 # idt base = 0L - -gdt_48: - .word (LAST_RESERVED_GDT_ENTRY*8)+7 - .long gdt_table-__PAGE_OFFSET - -.globl SYMBOL_NAME(trampoline_end) -SYMBOL_NAME_LABEL(trampoline_end) - -#endif /* CONFIG_SMP */ diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c deleted file mode 100644 index 9b3b11851a..0000000000 --- a/xen/arch/i386/traps.c +++ /dev/null @@ -1,910 +0,0 @@ -/****************************************************************************** - * arch/i386/traps.c - * - * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * xen/arch/i386/traps.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define GTBF_TRAP 1 -#define GTBF_TRAP_NOCODE 2 -#define GTBF_TRAP_CR2 4 -struct guest_trap_bounce { - unsigned long error_code; /* 0 */ - unsigned long cr2; /* 4 */ - unsigned short flags; /* 8 */ - unsigned short cs; /* 10 */ - unsigned long eip; /* 12 */ -} guest_trap_bounce[NR_CPUS] = { { 0 } }; - -#define DOUBLEFAULT_STACK_SIZE 1024 -static struct tss_struct doublefault_tss; -static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE]; - -asmlinkage int hypervisor_call(void); -asmlinkage void lcall7(void); -asmlinkage void lcall27(void); - -/* Master table, and the one used by CPU0. */ -struct desc_struct idt_table[256] = { {0, 0}, }; -/* All other CPUs have their own copy. */ -struct desc_struct *idt_tables[NR_CPUS] = { 0 }; - -asmlinkage void divide_error(void); -asmlinkage void debug(void); -asmlinkage void nmi(void); -asmlinkage void int3(void); -asmlinkage void overflow(void); -asmlinkage void bounds(void); -asmlinkage void invalid_op(void); -asmlinkage void device_not_available(void); -asmlinkage void coprocessor_segment_overrun(void); -asmlinkage void invalid_TSS(void); -asmlinkage void segment_not_present(void); -asmlinkage void stack_segment(void); -asmlinkage void general_protection(void); -asmlinkage void page_fault(void); -asmlinkage void coprocessor_error(void); -asmlinkage void simd_coprocessor_error(void); -asmlinkage void alignment_check(void); -asmlinkage void spurious_interrupt_bug(void); -asmlinkage void machine_check(void); - -int kstack_depth_to_print = 8*20; - -static inline int kernel_text_address(unsigned long addr) -{ - if (addr >= (unsigned long) &_stext && - addr <= (unsigned long) &_etext) - return 1; - return 0; - -} - -void show_stack(unsigned long *esp) -{ - unsigned long *stack, addr; - int i; - - printk("Stack trace from ESP=%p:\n", esp); - - stack = esp; - for ( i = 0; i < kstack_depth_to_print; i++ ) - { - if ( ((long)stack & (STACK_SIZE-1)) == 0 ) - break; - if ( i && ((i % 8) == 0) ) - printk("\n "); - if ( kernel_text_address(*stack) ) - printk("[%08lx] ", *stack++); - else - printk("%08lx ", *stack++); - } - printk("\n"); - - printk("Call Trace from ESP=%p: ", esp); - stack = esp; - i = 0; - while (((long) stack & (STACK_SIZE-1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - if (i && ((i % 6) == 0)) - printk("\n "); - printk("[<%08lx>] ", addr); - i++; - } - } - printk("\n"); -} - -void show_registers(struct pt_regs *regs) -{ - unsigned long esp; - unsigned short ss; - - esp = (unsigned long) (®s->esp); - ss = __HYPERVISOR_DS; - if ( regs->xcs & 3 ) - { - esp = regs->esp; - ss = regs->xss & 0xffff; - } - - printk("CPU: %d\nEIP: %04x:[<%08lx>] \nEFLAGS: %08lx\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags); - printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, - regs->xfs & 0xffff, regs->xgs & 0xffff, ss); - - show_stack(®s->esp); -} - - -spinlock_t die_lock = SPIN_LOCK_UNLOCKED; - -void die(const char * str, struct pt_regs * regs, long err) -{ - unsigned long flags; - spin_lock_irqsave(&die_lock, flags); - printk("%s: %04lx,%04lx\n", str, err >> 16, err & 0xffff); - show_registers(regs); - spin_unlock_irqrestore(&die_lock, flags); - panic("HYPERVISOR DEATH!!\n"); -} - - -static inline void do_trap(int trapnr, char *str, - struct pt_regs *regs, - long error_code, int use_error_code) -{ - struct task_struct *p = current; - struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); - trap_info_t *ti; - unsigned long fixup; - - if (!(regs->xcs & 3)) - goto fault_in_hypervisor; - - ti = current->thread.traps + trapnr; - gtb->flags = use_error_code ? GTBF_TRAP : GTBF_TRAP_NOCODE; - gtb->error_code = error_code; - gtb->cs = ti->cs; - gtb->eip = ti->address; - if ( TI_GET_IF(ti) ) - p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; - return; - - fault_in_hypervisor: - - if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) - { - DPRINTK("Trap %d: %08lx -> %08lx\n", trapnr, regs->eip, fixup); - regs->eip = fixup; - regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS; - return; - } - - show_registers(regs); - panic("CPU%d FATAL TRAP: vector = %d (%s)\n" - "[error_code=%08x]\n", - smp_processor_id(), trapnr, str, error_code); -} - -#define DO_ERROR_NOCODE(trapnr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ -{ \ -do_trap(trapnr, str, regs, error_code, 0); \ -} - -#define DO_ERROR(trapnr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ -{ \ -do_trap(trapnr, str, regs, error_code, 1); \ -} - -DO_ERROR_NOCODE( 0, "divide error", divide_error) - DO_ERROR_NOCODE( 4, "overflow", overflow) - DO_ERROR_NOCODE( 5, "bounds", bounds) - DO_ERROR_NOCODE( 6, "invalid operand", invalid_op) - DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun) - DO_ERROR(10, "invalid TSS", invalid_TSS) - DO_ERROR(11, "segment not present", segment_not_present) - DO_ERROR(12, "stack segment", stack_segment) -/* Vector 15 reserved by Intel */ - DO_ERROR_NOCODE(16, "fpu error", coprocessor_error) - DO_ERROR(17, "alignment check", alignment_check) - DO_ERROR_NOCODE(18, "machine check", machine_check) - DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error) - - asmlinkage void do_int3(struct pt_regs *regs, long error_code) -{ - struct task_struct *p = current; - struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); - trap_info_t *ti; - -#ifdef XEN_DEBUGGER - if ( pdb_initialized && pdb_handle_exception(3, regs) == 0 ) - return; -#endif - - if ( (regs->xcs & 3) != 3 ) - { - if ( unlikely((regs->xcs & 3) == 0) ) - { - show_registers(regs); - panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n" - "[error_code=%08x]\n", - smp_processor_id(), error_code); - } - } - - ti = current->thread.traps + 3; - gtb->flags = GTBF_TRAP_NOCODE; - gtb->error_code = error_code; - gtb->cs = ti->cs; - gtb->eip = ti->address; - if ( TI_GET_IF(ti) ) - p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; -} - -asmlinkage void do_double_fault(void) -{ - extern spinlock_t console_lock; - struct tss_struct *tss = &doublefault_tss; - unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1; - - /* Disable the NMI watchdog. It's useless now. */ - watchdog_on = 0; - - /* Find information saved during fault and dump it to the console. */ - tss = &init_tss[cpu]; - printk("CPU: %d\nEIP: %04x:[<%08lx>] \nEFLAGS: %08lx\n", - cpu, tss->cs, tss->eip, tss->eflags); - printk("CR3: %08lx\n", tss->__cr3); - printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - tss->eax, tss->ebx, tss->ecx, tss->edx); - printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - tss->esi, tss->edi, tss->ebp, tss->esp); - printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - tss->ds, tss->es, tss->fs, tss->gs, tss->ss); - printk("************************************\n"); - printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu); - printk("System needs manual reset.\n"); - printk("************************************\n"); - - /* Lock up the console to prevent spurious output from other CPUs. */ - spin_lock(&console_lock); - - /* Wait for manual reset. */ - for ( ; ; ) ; -} - -asmlinkage void do_page_fault(struct pt_regs *regs, long error_code) -{ - struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); - trap_info_t *ti; - unsigned long off, addr, fixup; - struct task_struct *p = current; - extern int map_ldt_shadow_page(unsigned int); - - __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : ); - - perfc_incrc(page_faults); - - if ( unlikely(addr >= LDT_VIRT_START) && - (addr < (LDT_VIRT_START + (p->mm.ldt_ents*LDT_ENTRY_SIZE))) ) - { - /* - * Copy a mapping from the guest's LDT, if it is valid. Otherwise we - * send the fault up to the guest OS to be handled. - */ - off = addr - LDT_VIRT_START; - addr = p->mm.ldt_base + off; - if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT)) ) - return; /* successfully copied the mapping */ - } - - if ( unlikely(p->mm.shadow_mode) && - (addr < PAGE_OFFSET) && shadow_fault(addr, error_code) ) - return; /* Returns TRUE if fault was handled. */ - - if ( unlikely(!(regs->xcs & 3)) ) - goto fault_in_hypervisor; - - ti = p->thread.traps + 14; - gtb->flags = GTBF_TRAP_CR2; /* page fault pushes %cr2 */ - gtb->cr2 = addr; - gtb->error_code = error_code; - gtb->cs = ti->cs; - gtb->eip = ti->address; - if ( TI_GET_IF(ti) ) - p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; - return; - - fault_in_hypervisor: - - if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) - { - perfc_incrc(copy_user_faults); - if ( !p->mm.shadow_mode ) - DPRINTK("Page fault: %08lx -> %08lx\n", regs->eip, fixup); - regs->eip = fixup; - regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS; - return; - } - - if ( addr >= PAGE_OFFSET ) - { - unsigned long page; - page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]); - printk("*pde = %08lx\n", page); - if ( page & _PAGE_PRESENT ) - { - page &= PAGE_MASK; - page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT]; - printk(" *pte = %08lx\n", page); - } -#ifdef MEMORY_GUARD - if ( !(error_code & 1) ) - printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n"); -#endif - } - -#ifdef XEN_DEBUGGER - if ( pdb_page_fault_possible ) - { - pdb_page_fault = 1; - /* make eax & edx valid to complete the instruction */ - regs->eax = (long)&pdb_page_fault_scratch; - regs->edx = (long)&pdb_page_fault_scratch; - return; - } -#endif - - show_registers(regs); - panic("CPU%d FATAL PAGE FAULT\n" - "[error_code=%08x]\n" - "Faulting linear address might be %08lx\n", - smp_processor_id(), error_code, addr); -} - -asmlinkage void do_general_protection(struct pt_regs *regs, long error_code) -{ - struct task_struct *p = current; - struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); - trap_info_t *ti; - unsigned long fixup; - - /* Badness if error in ring 0, or result of an interrupt. */ - if ( !(regs->xcs & 3) || (error_code & 1) ) - goto gp_in_kernel; - - /* - * Cunning trick to allow arbitrary "INT n" handling. - * - * We set DPL == 0 on all vectors in the IDT. This prevents any INT - * instruction from trapping to the appropriate vector, when that might not - * be expected by Xen or the guest OS. For example, that entry might be for - * a fault handler (unlike traps, faults don't increment EIP), or might - * expect an error code on the stack (which a software trap never - * provides), or might be a hardware interrupt handler that doesn't like - * being called spuriously. - * - * Instead, a GPF occurs with the faulting IDT vector in the error code. - * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is - * clear to indicate that it's a software fault, not hardware. - * - * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is - * okay because they can only be triggered by an explicit DPL-checked - * instruction. The DPL specified by the guest OS for these vectors is NOT - * CHECKED!! - */ - if ( (error_code & 3) == 2 ) - { - /* This fault must be due to instruction. */ - ti = current->thread.traps + (error_code>>3); - if ( TI_GET_DPL(ti) >= (regs->xcs & 3) ) - { -#ifdef XEN_DEBUGGER - if ( pdb_initialized && (pdb_ctx.system_call != 0) ) - { - unsigned long cr3; - __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); - if ( cr3 == pdb_ctx.ptbr ) - pdb_linux_syscall_enter_bkpt(regs, error_code, ti); - } -#endif - - gtb->flags = GTBF_TRAP_NOCODE; - regs->eip += 2; - goto finish_propagation; - } - } - - /* Pass on GPF as is. */ - ti = current->thread.traps + 13; - gtb->flags = GTBF_TRAP; - gtb->error_code = error_code; - finish_propagation: - gtb->cs = ti->cs; - gtb->eip = ti->address; - if ( TI_GET_IF(ti) ) - p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; - return; - - gp_in_kernel: - - if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) - { - DPRINTK("GPF (%04lx): %08lx -> %08lx\n", error_code, regs->eip, fixup); - regs->eip = fixup; - regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS; - return; - } - - die("general protection fault", regs, error_code); -} - -asmlinkage void mem_parity_error(unsigned char reason, struct pt_regs * regs) -{ - printk("NMI received. Dazed and confused, but trying to continue\n"); - printk("You probably have a hardware problem with your RAM chips\n"); - - /* Clear and disable the memory parity error line. */ - reason = (reason & 0xf) | 4; - outb(reason, 0x61); - - show_registers(regs); - panic("PARITY ERROR"); -} - -asmlinkage void io_check_error(unsigned char reason, struct pt_regs * regs) -{ - printk("NMI: IOCK error (debug interrupt?)\n"); - - reason = (reason & 0xf) | 8; - outb(reason, 0x61); - - show_registers(regs); - panic("IOCK ERROR"); -} - -static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) -{ - printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); - printk("Dazed and confused, but trying to continue\n"); - printk("Do you have a strange power saving mode enabled?\n"); -} - -asmlinkage void do_nmi(struct pt_regs * regs, unsigned long reason) -{ - ++nmi_count(smp_processor_id()); - -#if CONFIG_X86_LOCAL_APIC - if ( nmi_watchdog ) - nmi_watchdog_tick(regs); - else -#endif - unknown_nmi_error((unsigned char)(reason&0xff), regs); -} - -asmlinkage void math_state_restore(struct pt_regs *regs, long error_code) -{ - /* Prevent recursion. */ - clts(); - - if ( !test_bit(PF_USEDFPU, ¤t->flags) ) - { - if ( test_bit(PF_DONEFPUINIT, ¤t->flags) ) - restore_fpu(current); - else - init_fpu(); - set_bit(PF_USEDFPU, ¤t->flags); /* so we fnsave on switch_to() */ - } - - if ( test_and_clear_bit(PF_GUEST_STTS, ¤t->flags) ) - { - struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); - gtb->flags = GTBF_TRAP_NOCODE; - gtb->cs = current->thread.traps[7].cs; - gtb->eip = current->thread.traps[7].address; - } -} - -#ifdef XEN_DEBUGGER -asmlinkage void do_pdb_debug(struct pt_regs *regs, long error_code) -{ - unsigned int condition; - struct task_struct *tsk = current; - struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); - - __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); - if ( (condition & (1 << 14)) != (1 << 14) ) - printk("\nwarning: debug trap w/o BS bit [0x%x]\n\n", condition); - __asm__("movl %0,%%db6" : : "r" (0)); - - if ( pdb_handle_exception(1, regs) != 0 ) - { - tsk->thread.debugreg[6] = condition; - - gtb->flags = GTBF_TRAP_NOCODE; - gtb->cs = tsk->thread.traps[1].cs; - gtb->eip = tsk->thread.traps[1].address; - } -} -#endif - -asmlinkage void do_debug(struct pt_regs *regs, long error_code) -{ - unsigned int condition; - struct task_struct *tsk = current; - struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); - -#ifdef XEN_DEBUGGER - if ( pdb_initialized ) - return do_pdb_debug(regs, error_code); -#endif - - __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); - - /* Mask out spurious debug traps due to lazy DR7 setting */ - if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) && - (tsk->thread.debugreg[7] == 0) ) - { - __asm__("movl %0,%%db7" : : "r" (0)); - return; - } - - if ( (regs->xcs & 3) == 0 ) - { - /* Clear TF just for absolute sanity. */ - regs->eflags &= ~EF_TF; - /* - * Basically, we ignore watchpoints when they trigger in - * the hypervisor. This may happen when a buffer is passed - * to us which previously had a watchpoint set on it. - * No need to bump EIP; the only faulting trap is an - * instruction breakpoint, which can't happen to us. - */ - return; - } - - /* Save debug status register where guest OS can peek at it */ - tsk->thread.debugreg[6] = condition; - - gtb->flags = GTBF_TRAP_NOCODE; - gtb->cs = tsk->thread.traps[1].cs; - gtb->eip = tsk->thread.traps[1].address; -} - - -asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, - long error_code) -{ /* nothing */ } - - -#define _set_gate(gate_addr,type,dpl,addr) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ - "movw %4,%%dx\n\t" \ - "movl %%eax,%0\n\t" \ - "movl %%edx,%1" \ - :"=m" (*((long *) (gate_addr))), \ - "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ - :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ - "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \ -} while (0) - -void set_intr_gate(unsigned int n, void *addr) -{ - _set_gate(idt_table+n,14,0,addr); -} - -static void __init set_system_gate(unsigned int n, void *addr) -{ - _set_gate(idt_table+n,14,3,addr); -} - -static void set_task_gate(unsigned int n, unsigned int sel) -{ - idt_table[n].a = sel << 16; - idt_table[n].b = 0x8500; -} - -#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\ - *((gate_addr)+1) = ((base) & 0xff000000) | \ - (((base) & 0x00ff0000)>>16) | \ - ((limit) & 0xf0000) | \ - ((dpl)<<13) | \ - (0x00408000) | \ - ((type)<<8); \ - *(gate_addr) = (((base) & 0x0000ffff)<<16) | \ - ((limit) & 0x0ffff); } - -#define _set_tssldt_desc(n,addr,limit,type) \ -__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ - "movw %%ax,2(%2)\n\t" \ - "rorl $16,%%eax\n\t" \ - "movb %%al,4(%2)\n\t" \ - "movb %4,5(%2)\n\t" \ - "movb $0,6(%2)\n\t" \ - "movb %%ah,7(%2)\n\t" \ - "rorl $16,%%eax" \ - : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type)) - -void set_tss_desc(unsigned int n, void *addr) -{ - _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 8299, 0x89); -} - -void __init trap_init(void) -{ - /* - * Make a separate task for double faults. This will get us debug output if - * we blow the kernel stack. - */ - struct tss_struct *tss = &doublefault_tss; - memset(tss, 0, sizeof(*tss)); - tss->ds = __HYPERVISOR_DS; - tss->es = __HYPERVISOR_DS; - tss->ss = __HYPERVISOR_DS; - tss->esp = (unsigned long) - &doublefault_stack[DOUBLEFAULT_STACK_SIZE]; - tss->__cr3 = __pa(idle_pg_table); - tss->cs = __HYPERVISOR_CS; - tss->eip = (unsigned long)do_double_fault; - tss->eflags = 2; - tss->bitmap = INVALID_IO_BITMAP_OFFSET; - _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY, - (int)tss, 235, 0x89); - - /* - * Note that interrupt gates are always used, rather than trap gates. We - * must have interrupts disabled until DS/ES/FS/GS are saved because the - * first activation must have the "bad" value(s) for these registers and - * we may lose them if another activation is installed before they are - * saved. The page-fault handler also needs interrupts disabled until %cr2 - * has been read and saved on the stack. - */ - set_intr_gate(0,÷_error); - set_intr_gate(1,&debug); - set_intr_gate(2,&nmi); - set_system_gate(3,&int3); /* usable from all privilege levels */ - set_system_gate(4,&overflow); /* usable from all privilege levels */ - set_intr_gate(5,&bounds); - set_intr_gate(6,&invalid_op); - set_intr_gate(7,&device_not_available); - set_task_gate(8,__DOUBLEFAULT_TSS_ENTRY<<3); - set_intr_gate(9,&coprocessor_segment_overrun); - set_intr_gate(10,&invalid_TSS); - set_intr_gate(11,&segment_not_present); - set_intr_gate(12,&stack_segment); - set_intr_gate(13,&general_protection); - set_intr_gate(14,&page_fault); - set_intr_gate(15,&spurious_interrupt_bug); - set_intr_gate(16,&coprocessor_error); - set_intr_gate(17,&alignment_check); - set_intr_gate(18,&machine_check); - set_intr_gate(19,&simd_coprocessor_error); - - /* Only ring 1 can access monitor services. */ - _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,14,1,&hypervisor_call); - - /* CPU0 uses the master IDT. */ - idt_tables[0] = idt_table; - - /* - * Should be a barrier for any external CPU state. - */ - { - extern void cpu_init(void); - cpu_init(); - } -} - - -long do_set_trap_table(trap_info_t *traps) -{ - trap_info_t cur; - trap_info_t *dst = current->thread.traps; - - for ( ; ; ) - { - if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT; - - if ( cur.address == 0 ) break; - - if ( !VALID_CODESEL(cur.cs) ) return -EPERM; - - memcpy(dst+cur.vector, &cur, sizeof(cur)); - traps++; - } - - return 0; -} - - -long do_set_callbacks(unsigned long event_selector, - unsigned long event_address, - unsigned long failsafe_selector, - unsigned long failsafe_address) -{ - struct task_struct *p = current; - - if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) ) - return -EPERM; - - p->event_selector = event_selector; - p->event_address = event_address; - p->failsafe_selector = failsafe_selector; - p->failsafe_address = failsafe_address; - - return 0; -} - - -long set_fast_trap(struct task_struct *p, int idx) -{ - trap_info_t *ti; - - /* Index 0 is special: it disables fast traps. */ - if ( idx == 0 ) - { - if ( p == current ) - CLEAR_FAST_TRAP(&p->thread); - SET_DEFAULT_FAST_TRAP(&p->thread); - return 0; - } - - /* - * We only fast-trap vectors 0x20-0x2f, and vector 0x80. - * The former range is used by Windows and MS-DOS. - * Vector 0x80 is used by Linux and the BSD variants. - */ - if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) ) - return -1; - - ti = p->thread.traps + idx; - - /* - * We can't virtualise interrupt gates, as there's no way to get - * the CPU to automatically clear the events_mask variable. - */ - if ( TI_GET_IF(ti) ) - return -1; - - if ( p == current ) - CLEAR_FAST_TRAP(&p->thread); - - p->thread.fast_trap_idx = idx; - p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff); - p->thread.fast_trap_desc.b = - (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13; - - if ( p == current ) - SET_FAST_TRAP(&p->thread); - - return 0; -} - - -long do_set_fast_trap(int idx) -{ - return set_fast_trap(current, idx); -} - - -long do_fpu_taskswitch(void) -{ - set_bit(PF_GUEST_STTS, ¤t->flags); - stts(); - return 0; -} - - -long set_debugreg(struct task_struct *p, int reg, unsigned long value) -{ - int i; - - switch ( reg ) - { - case 0: - if ( value > (PAGE_OFFSET-4) ) return -EPERM; - if ( p == current ) - __asm__ ( "movl %0, %%db0" : : "r" (value) ); - break; - case 1: - if ( value > (PAGE_OFFSET-4) ) return -EPERM; - if ( p == current ) - __asm__ ( "movl %0, %%db1" : : "r" (value) ); - break; - case 2: - if ( value > (PAGE_OFFSET-4) ) return -EPERM; - if ( p == current ) - __asm__ ( "movl %0, %%db2" : : "r" (value) ); - break; - case 3: - if ( value > (PAGE_OFFSET-4) ) return -EPERM; - if ( p == current ) - __asm__ ( "movl %0, %%db3" : : "r" (value) ); - break; - case 6: - /* - * DR6: Bits 4-11,16-31 reserved (set to 1). - * Bit 12 reserved (set to 0). - */ - value &= 0xffffefff; /* reserved bits => 0 */ - value |= 0xffff0ff0; /* reserved bits => 1 */ - if ( p == current ) - __asm__ ( "movl %0, %%db6" : : "r" (value) ); - break; - case 7: - /* - * DR7: Bit 10 reserved (set to 1). - * Bits 11-12,14-15 reserved (set to 0). - * Privileged bits: - * GD (bit 13): must be 0. - * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10. - * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10. - */ - /* DR7 == 0 => debugging disabled for this domain. */ - if ( value != 0 ) - { - value &= 0xffff27ff; /* reserved bits => 0 */ - value |= 0x00000400; /* reserved bits => 1 */ - if ( (value & (1<<13)) != 0 ) return -EPERM; - for ( i = 0; i < 16; i += 2 ) - if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM; - } - if ( p == current ) - __asm__ ( "movl %0, %%db7" : : "r" (value) ); - break; - default: - return -EINVAL; - } - - p->thread.debugreg[reg] = value; - return 0; -} - -long do_set_debugreg(int reg, unsigned long value) -{ - return set_debugreg(current, reg, value); -} - -unsigned long do_get_debugreg(int reg) -{ - if ( (reg < 0) || (reg > 7) ) return -EINVAL; - return current->thread.debugreg[reg]; -} diff --git a/xen/arch/i386/usercopy.c b/xen/arch/i386/usercopy.c deleted file mode 100644 index dc2d34cb90..0000000000 --- a/xen/arch/i386/usercopy.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * User address space access functions. - * The non inlined parts of asm-i386/uaccess.h are here. - * - * Copyright 1997 Andi Kleen - * Copyright 1997 Linus Torvalds - */ -#include -#include -//#include - -#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS - -unsigned long -__generic_copy_to_user(void *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - { - if(n<512) - __copy_user(to,from,n); - else - mmx_copy_user(to,from,n); - } - return n; -} - -unsigned long -__generic_copy_from_user(void *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - { - if(n<512) - __copy_user_zeroing(to,from,n); - else - mmx_copy_user_zeroing(to, from, n); - } - else - memset(to, 0, n); - return n; -} - -#else - -unsigned long -__generic_copy_to_user(void *to, const void *from, unsigned long n) -{ - prefetch(from); - if (access_ok(VERIFY_WRITE, to, n)) - __copy_user(to,from,n); - return n; -} - -unsigned long -__generic_copy_from_user(void *to, const void *from, unsigned long n) -{ - prefetchw(to); - if (access_ok(VERIFY_READ, from, n)) - __copy_user_zeroing(to,from,n); - else - memset(to, 0, n); - return n; -} - -#endif - -/* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst,src,count,res) \ -do { \ - int __d0, __d1, __d2; \ - __asm__ __volatile__( \ - " testl %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decl %1\n" \ - " jnz 0b\n" \ - "1: subl %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - ".previous" \ - : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -long -__strncpy_from_user(char *dst, const char *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} - -long -strncpy_from_user(char *dst, const char *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - __do_strncpy_from_user(dst, src, count, res); - return res; -} - - -/* - * Zero Userspace - */ - -#define __do_clear_user(addr,size) \ -do { \ - int __d0; \ - __asm__ __volatile__( \ - "0: rep; stosl\n" \ - " movl %2,%0\n" \ - "1: rep; stosb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: lea 0(%2,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0) \ - : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ -} while (0) - -unsigned long -clear_user(void *to, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - __do_clear_user(to, n); - return n; -} - -unsigned long -__clear_user(void *to, unsigned long n) -{ - __do_clear_user(to, n); - return n; -} - -/* - * Return the size of a string (including the ending 0) - * - * Return 0 on exception, a value greater than N if too long - */ - -long strnlen_user(const char *s, long n) -{ - unsigned long mask = -__addr_ok(s); - unsigned long res, tmp; - - __asm__ __volatile__( - " testl %0, %0\n" - " jz 3f\n" - " andl %0,%%ecx\n" - "0: repne; scasb\n" - " setne %%al\n" - " subl %%ecx,%0\n" - " addl %0,%%eax\n" - "1:\n" - ".section .fixup,\"ax\"\n" - "2: xorl %%eax,%%eax\n" - " jmp 1b\n" - "3: movb $1,%%al\n" - " jmp 1b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b,2b\n" - ".previous" - :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) - :"0" (n), "1" (s), "2" (0), "3" (mask) - :"cc"); - return res & mask; -} diff --git a/xen/arch/i386/xen.lds b/xen/arch/i386/xen.lds deleted file mode 100644 index 5947ebada5..0000000000 --- a/xen/arch/i386/xen.lds +++ /dev/null @@ -1,87 +0,0 @@ -/* ld script to make i386 Linux kernel - * Written by Martin Mares ; - */ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(start) -SECTIONS -{ - . = 0xFC400000 + 0x100000; - _text = .; /* Text and read-only data */ - .text : { - *(.text) - *(.fixup) - *(.gnu.warning) - } = 0x9090 - .text.lock : { *(.text.lock) } /* out-of-line lock text */ - - _etext = .; /* End of text section */ - - .rodata : { *(.rodata) *(.rodata.*) } - .kstrtab : { *(.kstrtab) } - - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - - __start___ksymtab = .; /* Kernel symbol table */ - __ksymtab : { *(__ksymtab) } - __stop___ksymtab = .; - - __start___kallsyms = .; /* All kernel symbols */ - __kallsyms : { *(__kallsyms) } - __stop___kallsyms = .; - - .data : { /* Data */ - *(.data) - CONSTRUCTORS - } - - _edata = .; /* End of data section */ - - . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .text.init : { *(.text.init) } - .data.init : { *(.data.init) } - . = ALIGN(16); - __setup_start = .; - .setup.init : { *(.setup.init) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { *(.initcall.init) } - __initcall_end = .; - . = ALIGN(4096); - __init_end = .; - - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - __bss_start = .; /* BSS */ - .bss : { - *(.bss) - } - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.text.exit) - *(.data.exit) - *(.exitcall.exit) - } - - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } -} diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile new file mode 100644 index 0000000000..8257f479ad --- /dev/null +++ b/xen/arch/x86/Makefile @@ -0,0 +1,23 @@ + +include $(BASEDIR)/Rules.mk + +ifneq ($(debugger),y) +OBJS := $(subst pdb-linux.o,,$(OBJS)) +OBJS := $(subst pdb-stub.o,,$(OBJS)) +endif + +# What happens here? We link monitor object files together, starting +# at MONITOR_BASE (a very high address). But bootloader cannot put +# things there, so we initially load at LOAD_BASE. A hacky little +# tool called `elf-reloc' is used to modify segment offsets from +# MONITOR_BASE-relative to LOAD_BASE-relative. +# (NB. Linux gets round this by turning its image into raw binary, then +# wrapping that with a low-memory bootstrapper.) +default: boot/boot.o $(OBJS) + $(LD) -r -o arch.o $(OBJS) + $(LD) $(LDFLAGS) boot/boot.o $(ALL_OBJS) -o $(TARGET).dbg + objcopy -R .note -R .comment -S $(TARGET).dbg $(TARGET) + $(BASEDIR)/tools/elf-reloc $(MONITOR_BASE) $(LOAD_BASE) $(TARGET) + +clean: + rm -f *.o *~ core boot/*.o boot/*~ boot/core diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk new file mode 100644 index 0000000000..ea108159b6 --- /dev/null +++ b/xen/arch/x86/Rules.mk @@ -0,0 +1,28 @@ +######################################## +# x86-specific definitions + +CC := gcc +LD := ld + +# Linker should relocate monitor to this address +MONITOR_BASE := 0xFC500000 + +# Bootloader should load monitor to this real address +LOAD_BASE := 0x00100000 + +CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing -O3 +CFLAGS += -iwithprefix include -Wall -Werror -DMONITOR_BASE=$(MONITOR_BASE) +CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ +CFLAGS += -Wno-pointer-arith -Wredundant-decls -D$(TARGET_SUBARCH) + +LDFLAGS := -T xen.lds -N + +ifeq ($(TARGET_SUBARCH),x86_32) +CFLAGS += -m32 -march=i686 +LDARCHFLAGS := --oformat elf32-i386 +endif + +ifeq ($(TARGET_SUBARCH),x86_64) +CFLAGS += -m64 +LDARCHFLAGS := +endif diff --git a/xen/arch/x86/acpi.c b/xen/arch/x86/acpi.c new file mode 100644 index 0000000000..6672e406aa --- /dev/null +++ b/xen/arch/x86/acpi.c @@ -0,0 +1,672 @@ +/* + * acpi.c - Architecture-Specific Low-Level ACPI Support + * + * Copyright (C) 2001, 2002 Paul Diefenbaugh + * Copyright (C) 2001 Jun Nakajima + * Copyright (C) 2001 Patrick Mochel + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define PREFIX "ACPI: " + +int acpi_lapic = 0; +int acpi_ioapic = 0; + +/* -------------------------------------------------------------------------- + Boot-time Configuration + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI_BOOT +int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */ +int acpi_ht __initdata = 1; /* enable HT */ + +enum acpi_irq_model_id acpi_irq_model; + + +/* + * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, + * to map the target physical address. The problem is that set_fixmap() + * provides a single page, and it is possible that the page is not + * sufficient. + * By using this area, we can map up to MAX_IO_APICS pages temporarily, + * i.e. until the next __va_range() call. + * + * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* + * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and + * count idx down while incrementing the phys address. + */ +char *__acpi_map_table(unsigned long phys, unsigned long size) +{ + unsigned long base, offset, mapped_size; + int idx; + + if (phys + size < 8*1024*1024) + return __va(phys); + + offset = phys & (PAGE_SIZE - 1); + mapped_size = PAGE_SIZE - offset; + set_fixmap(FIX_ACPI_END, phys); + base = fix_to_virt(FIX_ACPI_END); + + /* + * Most cases can be covered by the below. + */ + idx = FIX_ACPI_END; + while (mapped_size < size) { + if (--idx < FIX_ACPI_BEGIN) + return 0; /* cannot handle this */ + phys += PAGE_SIZE; + set_fixmap(idx, phys); + mapped_size += PAGE_SIZE; + } + + return ((unsigned char *) base + offset); +} + + +#ifdef CONFIG_X86_LOCAL_APIC + +static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; + + +static int __init +acpi_parse_madt ( + unsigned long phys_addr, + unsigned long size) +{ + struct acpi_table_madt *madt = NULL; + + if (!phys_addr || !size) + return -EINVAL; + + madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size); + if (!madt) { + printk(KERN_WARNING PREFIX "Unable to map MADT\n"); + return -ENODEV; + } + + if (madt->lapic_address) + acpi_lapic_addr = (u64) madt->lapic_address; + + printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n", + madt->lapic_address); + + detect_clustered_apic(madt->header.oem_id, madt->header.oem_table_id); + + return 0; +} + + +static int __init +acpi_parse_lapic ( + acpi_table_entry_header *header) +{ + struct acpi_table_lapic *processor = NULL; + + processor = (struct acpi_table_lapic*) header; + if (!processor) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + mp_register_lapic ( + processor->id, /* APIC ID */ + processor->flags.enabled); /* Enabled? */ + + return 0; +} + + +static int __init +acpi_parse_lapic_addr_ovr ( + acpi_table_entry_header *header) +{ + struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; + + lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header; + if (!lapic_addr_ovr) + return -EINVAL; + + acpi_lapic_addr = lapic_addr_ovr->address; + + return 0; +} + +static int __init +acpi_parse_lapic_nmi ( + acpi_table_entry_header *header) +{ + struct acpi_table_lapic_nmi *lapic_nmi = NULL; + + lapic_nmi = (struct acpi_table_lapic_nmi*) header; + if (!lapic_nmi) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + if (lapic_nmi->lint != 1) + printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); + + return 0; +} + +#endif /*CONFIG_X86_LOCAL_APIC*/ + +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) + +static int __init +acpi_parse_ioapic ( + acpi_table_entry_header *header) +{ + struct acpi_table_ioapic *ioapic = NULL; + + ioapic = (struct acpi_table_ioapic*) header; + if (!ioapic) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + mp_register_ioapic ( + ioapic->id, + ioapic->address, + ioapic->global_irq_base); + + return 0; +} + + +static int __init +acpi_parse_int_src_ovr ( + acpi_table_entry_header *header) +{ + struct acpi_table_int_src_ovr *intsrc = NULL; + + intsrc = (struct acpi_table_int_src_ovr*) header; + if (!intsrc) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + mp_override_legacy_irq ( + intsrc->bus_irq, + intsrc->flags.polarity, + intsrc->flags.trigger, + intsrc->global_irq); + + return 0; +} + + +static int __init +acpi_parse_nmi_src ( + acpi_table_entry_header *header) +{ + struct acpi_table_nmi_src *nmi_src = NULL; + + nmi_src = (struct acpi_table_nmi_src*) header; + if (!nmi_src) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + /* TBD: Support nimsrc entries? */ + + return 0; +} + +#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ + + +static unsigned long __init +acpi_scan_rsdp ( + unsigned long start, + unsigned long length) +{ + unsigned long offset = 0; + unsigned long sig_len = sizeof("RSD PTR ") - 1; + + /* + * Scan all 16-byte boundaries of the physical memory region for the + * RSDP signature. + */ + for (offset = 0; offset < length; offset += 16) { + if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) + continue; + return (start + offset); + } + + return 0; +} + + +unsigned long __init +acpi_find_rsdp (void) +{ + unsigned long rsdp_phys = 0; + + /* + * Scan memory looking for the RSDP signature. First search EBDA (low + * memory) paragraphs and then search upper memory (E0000-FFFFF). + */ + rsdp_phys = acpi_scan_rsdp (0, 0x400); + if (!rsdp_phys) + rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF); + + return rsdp_phys; +} + + +/* + * acpi_boot_init() + * called from setup_arch(), always. + * 1. maps ACPI tables for later use + * 2. enumerates lapics + * 3. enumerates io-apics + * + * side effects: + * acpi_lapic = 1 if LAPIC found + * acpi_ioapic = 1 if IOAPIC found + * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; + * if acpi_blacklisted() acpi_disabled = 1; + * acpi_irq_model=... + * ... + * + * return value: (currently ignored) + * 0: success + * !0: failure + */ +int __init +acpi_boot_init (void) +{ + int result = 0; + + if (acpi_disabled && !acpi_ht) + return(1); + + /* + * The default interrupt routing model is PIC (8259). This gets + * overriden if IOAPICs are enumerated (below). + */ + acpi_irq_model = ACPI_IRQ_MODEL_PIC; + + /* + * Initialize the ACPI boot-time table parser. + */ + result = acpi_table_init(); + if (result) { + acpi_disabled = 1; + return result; + } + + result = acpi_blacklisted(); + if (result) { + printk(KERN_NOTICE PREFIX "BIOS listed in blacklist, disabling ACPI support\n"); + acpi_disabled = 1; + return result; + } + +#ifdef CONFIG_X86_LOCAL_APIC + + /* + * MADT + * ---- + * Parse the Multiple APIC Description Table (MADT), if exists. + * Note that this table provides platform SMP configuration + * information -- the successor to MPS tables. + */ + + result = acpi_table_parse(ACPI_APIC, acpi_parse_madt); + if (!result) { + return 0; + } + else if (result < 0) { + printk(KERN_ERR PREFIX "Error parsing MADT\n"); + return result; + } + else if (result > 1) + printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n"); + + /* + * Local APIC + * ---------- + * Note that the LAPIC address is obtained from the MADT (32-bit value) + * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). + */ + + result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr); + if (result < 0) { + printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); + return result; + } + + mp_register_lapic_address(acpi_lapic_addr); + + result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic); + if (!result) { + printk(KERN_ERR PREFIX "No LAPIC entries present\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return -ENODEV; + } + else if (result < 0) { + printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return result; + } + + result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi); + if (result < 0) { + printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return result; + } + + acpi_lapic = 1; + +#endif /*CONFIG_X86_LOCAL_APIC*/ + +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) + + /* + * I/O APIC + * -------- + */ + + /* + * ACPI interpreter is required to complete interrupt setup, + * so if it is off, don't enumerate the io-apics with ACPI. + * If MPS is present, it will handle them, + * otherwise the system will stay in PIC mode + */ + if (acpi_disabled || acpi_noirq) { + return 1; + } + + /* + * if "noapic" boot option, don't look for IO-APICs + */ + if (ioapic_setup_disabled()) { + printk(KERN_INFO PREFIX "Skipping IOAPIC probe " + "due to 'noapic' option.\n"); + return 1; + } + + + result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic); + if (!result) { + printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); + return -ENODEV; + } + else if (result < 0) { + printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); + return result; + } + + /* Build a default routing table for legacy (ISA) interrupts. */ + mp_config_acpi_legacy_irqs(); + + result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr); + if (result < 0) { + printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return result; + } + + result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src); + if (result < 0) { + printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return result; + } + + acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; + + acpi_irq_balance_set(NULL); + + acpi_ioapic = 1; + + if (acpi_lapic && acpi_ioapic) + smp_found_config = 1; + +#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ + + return 0; +} + +#endif /*CONFIG_ACPI_BOOT*/ + +#ifdef CONFIG_ACPI_BUS +/* + * "acpi_pic_sci=level" (current default) + * programs the PIC-mode SCI to Level Trigger. + * (NO-OP if the BIOS set Level Trigger already) + * + * If a PIC-mode SCI is not recogznied or gives spurious IRQ7's + * it may require Edge Trigger -- use "acpi_pic_sci=edge" + * (NO-OP if the BIOS set Edge Trigger already) + * + * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers + * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. + * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) + * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) + */ + +static __initdata int acpi_pic_sci_trigger; /* 0: level, 1: edge */ + +void __init +acpi_pic_sci_set_trigger(unsigned int irq) +{ + unsigned char mask = 1 << (irq & 7); + unsigned int port = 0x4d0 + (irq >> 3); + unsigned char val = inb(port); + + + printk(PREFIX "IRQ%d SCI:", irq); + if (!(val & mask)) { + printk(" Edge"); + + if (!acpi_pic_sci_trigger) { + printk(" set to Level"); + outb(val | mask, port); + } + } else { + printk(" Level"); + + if (acpi_pic_sci_trigger) { + printk(" set to Edge"); + outb(val | mask, port); + } + } + printk(" Trigger.\n"); +} + +int __init +acpi_pic_sci_setup(char *str) +{ + while (str && *str) { + if (strncmp(str, "level", 5) == 0) + acpi_pic_sci_trigger = 0; /* force level trigger */ + if (strncmp(str, "edge", 4) == 0) + acpi_pic_sci_trigger = 1; /* force edge trigger */ + str = strchr(str, ','); + if (str) + str += strspn(str, ", \t"); + } + return 1; +} + +__setup("acpi_pic_sci=", acpi_pic_sci_setup); + +#endif /* CONFIG_ACPI_BUS */ + + + +/* -------------------------------------------------------------------------- + Low-Level Sleep Support + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI_SLEEP + +#define DEBUG + +#ifdef DEBUG +#include +#endif + +/* address in low memory of the wakeup routine. */ +unsigned long acpi_wakeup_address = 0; + +/* new page directory that we will be using */ +static pmd_t *pmd; + +/* saved page directory */ +static pmd_t saved_pmd; + +/* page which we'll use for the new page directory */ +static pte_t *ptep; + +extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); + +/* + * acpi_create_identity_pmd + * + * Create a new, identity mapped pmd. + * + * Do this by creating new page directory, and marking all the pages as R/W + * Then set it as the new Page Middle Directory. + * And, of course, flush the TLB so it takes effect. + * + * We save the address of the old one, for later restoration. + */ +static void acpi_create_identity_pmd (void) +{ + pgd_t *pgd; + int i; + + ptep = (pte_t*)__get_free_page(GFP_KERNEL); + + /* fill page with low mapping */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED)); + + pgd = pgd_offset(current->active_mm, 0); + pmd = pmd_alloc(current->mm,pgd, 0); + + /* save the old pmd */ + saved_pmd = *pmd; + + /* set the new one */ + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep))); + + /* flush the TLB */ + local_flush_tlb(); +} + +/* + * acpi_restore_pmd + * + * Restore the old pmd saved by acpi_create_identity_pmd and + * free the page that said function alloc'd + */ +static void acpi_restore_pmd (void) +{ + set_pmd(pmd, saved_pmd); + local_flush_tlb(); + free_page((unsigned long)ptep); +} + +/** + * acpi_save_state_mem - save kernel state + * + * Create an identity mapped page table and copy the wakeup routine to + * low memory. + */ +int acpi_save_state_mem (void) +{ + acpi_create_identity_pmd(); + acpi_copy_wakeup_routine(acpi_wakeup_address); + + return 0; +} + +/** + * acpi_save_state_disk - save kernel state to disk + * + */ +int acpi_save_state_disk (void) +{ + return 1; +} + +/* + * acpi_restore_state + */ +void acpi_restore_state_mem (void) +{ + acpi_restore_pmd(); +} + +/** + * acpi_reserve_bootmem - do _very_ early ACPI initialisation + * + * We allocate a page in low memory for the wakeup + * routine for when we come back from a sleep state. The + * runtime allocator allows specification of <16M pages, but not + * <1M pages. + */ +void __init acpi_reserve_bootmem(void) +{ + acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); + printk(KERN_DEBUG "ACPI: have wakeup address 0x%8.8lx\n", acpi_wakeup_address); +} + +void do_suspend_lowlevel_s4bios(int resume) +{ + if (!resume) { + save_processor_context(); + acpi_save_register_state((unsigned long)&&acpi_sleep_done); + acpi_enter_sleep_state_s4bios(); + return; + } +acpi_sleep_done: + restore_processor_context(); +} + + +#endif /*CONFIG_ACPI_SLEEP*/ + diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c new file mode 100644 index 0000000000..476c96926a --- /dev/null +++ b/xen/arch/x86/apic.c @@ -0,0 +1,830 @@ +/* + * Local APIC handling, local APIC timers + * + * (c) 1999, 2000 Ingo Molnar + * + * Fixes + * Maciej W. Rozycki : Bits for genuine 82489DX APICs; + * thanks to Eric Gilmore + * and Rolf G. Tews + * for testing these extensively. + * Maciej W. Rozycki : Various updates and fixes. + * Mikael Pettersson : Power Management for UP-APIC. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Using APIC to generate smp_local_timer_interrupt? */ +int using_apic_timer = 0; + +static int enabled_via_apicbase; + +int get_maxlvt(void) +{ + unsigned int v, ver, maxlvt; + + v = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(v); + /* 82489DXs do not report # of LVT entries. */ + maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; + return maxlvt; +} + +void clear_local_APIC(void) +{ + int maxlvt; + unsigned long v; + + maxlvt = get_maxlvt(); + + /* + * Masking an LVT entry on a P6 can trigger a local APIC error + * if the vector is zero. Mask LVTERR first to prevent this. + */ + if (maxlvt >= 3) { + v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ + apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); + } + /* + * Careful: we have to set masks only first to deassert + * any level-triggered sources. + */ + v = apic_read(APIC_LVTT); + apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT0); + apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT1); + apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); + if (maxlvt >= 4) { + v = apic_read(APIC_LVTPC); + apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); + } + + /* + * Clean APIC state for other OSs: + */ + apic_write_around(APIC_LVTT, APIC_LVT_MASKED); + apic_write_around(APIC_LVT0, APIC_LVT_MASKED); + apic_write_around(APIC_LVT1, APIC_LVT_MASKED); + if (maxlvt >= 3) + apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); + if (maxlvt >= 4) + apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); + v = GET_APIC_VERSION(apic_read(APIC_LVR)); + if (APIC_INTEGRATED(v)) { /* !82489DX */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } +} + +void __init connect_bsp_APIC(void) +{ + if (pic_mode) { + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + /* + * PIC mode, enable APIC mode in the IMCR, i.e. + * connect BSP's local APIC to INT and NMI lines. + */ + printk("leaving PIC mode, enabling APIC mode.\n"); + outb(0x70, 0x22); + outb(0x01, 0x23); + } +} + +void disconnect_bsp_APIC(void) +{ + if (pic_mode) { + /* + * Put the board back into PIC mode (has an effect + * only on certain older boards). Note that APIC + * interrupts, including IPIs, won't work beyond + * this point! The only exception are INIT IPIs. + */ + printk("disabling APIC mode, entering PIC mode.\n"); + outb(0x70, 0x22); + outb(0x00, 0x23); + } +} + +void disable_local_APIC(void) +{ + unsigned long value; + + clear_local_APIC(); + + /* + * Disable APIC (implies clearing of registers + * for 82489DX!). + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_SPIV_APIC_ENABLED; + apic_write_around(APIC_SPIV, value); + + if (enabled_via_apicbase) { + unsigned int l, h; + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_ENABLE; + wrmsr(MSR_IA32_APICBASE, l, h); + } +} + +/* + * This is to verify that we're looking at a real local APIC. + * Check these against your board if the CPUs aren't getting + * started for no apparent reason. + */ +int __init verify_local_APIC(void) +{ + unsigned int reg0, reg1; + + /* + * The version register is read-only in a real APIC. + */ + reg0 = apic_read(APIC_LVR); + Dprintk("Getting VERSION: %x\n", reg0); + apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); + reg1 = apic_read(APIC_LVR); + Dprintk("Getting VERSION: %x\n", reg1); + + /* + * The two version reads above should print the same + * numbers. If the second one is different, then we + * poke at a non-APIC. + */ + if (reg1 != reg0) + return 0; + + /* + * Check if the version looks reasonably. + */ + reg1 = GET_APIC_VERSION(reg0); + if (reg1 == 0x00 || reg1 == 0xff) + return 0; + reg1 = get_maxlvt(); + if (reg1 < 0x02 || reg1 == 0xff) + return 0; + + /* + * The ID register is read/write in a real APIC. + */ + reg0 = apic_read(APIC_ID); + Dprintk("Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + Dprintk("Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; + + /* + * The next two are just to see if we have sane values. + * They're only really relevant if we're in Virtual Wire + * compatibility mode, but most boxes are anymore. + */ + reg0 = apic_read(APIC_LVT0); + Dprintk("Getting LVT0: %x\n", reg0); + reg1 = apic_read(APIC_LVT1); + Dprintk("Getting LVT1: %x\n", reg1); + + return 1; +} + +void __init sync_Arb_IDs(void) +{ + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + Dprintk("Synchronizing Arb IDs.\n"); + apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG + | APIC_DM_INIT); +} + +extern void __error_in_apic_c (void); + +/* + * WAS: An initial setup of the virtual wire mode. + * NOW: We don't bother doing anything. All we need at this point + * is to receive timer ticks, so that 'jiffies' is incremented. + * If we're SMP, then we can assume BIOS did setup for us. + * If we're UP, then the APIC should be disabled (it is at reset). + * If we're UP and APIC is enabled, then BIOS is clever and has + * probably done initial interrupt routing for us. + */ +void __init init_bsp_APIC(void) +{ +} + +static unsigned long calculate_ldr(unsigned long old) +{ + unsigned long id = 1UL << smp_processor_id(); + return (old & ~APIC_LDR_MASK)|SET_APIC_LOGICAL_ID(id); +} + +void __init setup_local_APIC (void) +{ + unsigned long value, ver, maxlvt; + + value = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(value); + + if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) + __error_in_apic_c(); + + /* Double-check wether this APIC is really registered. */ + if (!test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map)) + BUG(); + + /* + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ + + /* + * In clustered apic mode, the firmware does this for us + * Put the APIC into flat delivery mode. + * Must be "all ones" explicitly for 82489DX. + */ + apic_write_around(APIC_DFR, APIC_DFR_FLAT); + + /* + * Set up the logical destination ID. + */ + value = apic_read(APIC_LDR); + apic_write_around(APIC_LDR, calculate_ldr(value)); + + /* + * Set Task Priority to 'accept all'. We never change this + * later on. + */ + value = apic_read(APIC_TASKPRI); + value &= ~APIC_TPRI_MASK; + apic_write_around(APIC_TASKPRI, value); + + /* + * Now that we are all set up, enable the APIC + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + /* + * Enable APIC + */ + value |= APIC_SPIV_APIC_ENABLED; + + /* Enable focus processor (bit==0) */ + value &= ~APIC_SPIV_FOCUS_DISABLED; + + /* Set spurious IRQ vector */ + value |= SPURIOUS_APIC_VECTOR; + apic_write_around(APIC_SPIV, value); + + /* + * Set up LVT0, LVT1: + * + * set up through-local-APIC on the BP's LINT0. This is not + * strictly necessery in pure symmetric-IO mode, but sometimes + * we delegate interrupts to the 8259A. + */ + /* + * TODO: set up through-local-APIC from through-I/O-APIC? --macro + */ + value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; + if (!smp_processor_id()) { + value = APIC_DM_EXTINT; + printk("enabled ExtINT on CPU#%d\n", smp_processor_id()); + } else { + value = APIC_DM_EXTINT | APIC_LVT_MASKED; + printk("masked ExtINT on CPU#%d\n", smp_processor_id()); + } + apic_write_around(APIC_LVT0, value); + + /* + * only the BP should see the LINT1 NMI signal, obviously. + */ + if (!smp_processor_id()) + value = APIC_DM_NMI; + else + value = APIC_DM_NMI | APIC_LVT_MASKED; + if (!APIC_INTEGRATED(ver)) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; + apic_write_around(APIC_LVT1, value); + + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + maxlvt = get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + printk("ESR value before enabling vector: %08lx\n", value); + + value = ERROR_APIC_VECTOR; /* enables sending errors */ + apic_write_around(APIC_LVTERR, value); + /* spec says clear errors after enabling vector. */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + printk("ESR value after enabling vector: %08lx\n", value); + } else { + printk("No ESR for 82489DX.\n"); + } + + if ( (smp_processor_id() == 0) && (nmi_watchdog == NMI_LOCAL_APIC) ) + setup_apic_nmi_watchdog(); +} + + +static inline void apic_pm_init1(void) { } +static inline void apic_pm_init2(void) { } + + +/* + * Detect and enable local APICs on non-SMP boards. + * Original code written by Keir Fraser. + */ + +static int __init detect_init_APIC (void) +{ + u32 h, l, features; + extern void get_cpu_vendor(struct cpuinfo_x86*); + + /* Workaround for us being called before identify_cpu(). */ + get_cpu_vendor(&boot_cpu_data); + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) + break; + if (boot_cpu_data.x86 == 15 && cpu_has_apic) + break; + goto no_apic; + case X86_VENDOR_INTEL: + if (boot_cpu_data.x86 == 6 || + (boot_cpu_data.x86 == 15 && cpu_has_apic) || + (boot_cpu_data.x86 == 5 && cpu_has_apic)) + break; + goto no_apic; + default: + goto no_apic; + } + + if (!cpu_has_apic) { + /* + * Some BIOSes disable the local APIC in the + * APIC_BASE MSR. This can only be done in + * software for Intel P6 and AMD K7 (Model > 1). + */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + printk("Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } + } + + /* The APIC feature bit should now be enabled in `cpuid' */ + features = cpuid_edx(1); + if (!(features & (1 << X86_FEATURE_APIC))) { + printk("Could not enable APIC!\n"); + return -1; + } + + set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability); + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + boot_cpu_physical_apicid = 0; + + /* The BIOS may have set up the APIC at some other address */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + + if (nmi_watchdog != NMI_NONE) + nmi_watchdog = NMI_LOCAL_APIC; + + printk("Found and enabled local APIC!\n"); + apic_pm_init1(); + return 0; + + no_apic: + printk("No local APIC present or hardware disabled\n"); + return -1; +} + +void __init init_apic_mappings(void) +{ + unsigned long apic_phys = 0; + + /* + * If no local APIC can be found then set up a fake all zeroes page to + * simulate the local APIC and another one for the IO-APIC. + */ + if (!smp_found_config && detect_init_APIC()) { + apic_phys = get_free_page(GFP_KERNEL); + apic_phys = __pa(apic_phys); + } else + apic_phys = mp_lapic_addr; + + set_fixmap_nocache(FIX_APIC_BASE, apic_phys); + Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); + + /* + * Fetch the APIC ID of the BSP in case we have a + * default configuration (or the MP table is broken). + */ + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + +#ifdef CONFIG_X86_IO_APIC + { + unsigned long ioapic_phys = 0, idx = FIX_IO_APIC_BASE_0; + int i; + + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) + ioapic_phys = mp_ioapics[i].mpc_apicaddr; + set_fixmap_nocache(idx, ioapic_phys); + Dprintk("mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); + idx++; + } + } +#endif +} + +/***************************************************************************** + * APIC calibration + * + * The APIC is programmed in bus cycles. + * Timeout values should specified in real time units. + * The "cheapest" time source is the cyclecounter. + * + * Thus, we need a mappings from: bus cycles <- cycle counter <- system time + * + * The calibration is currently a bit shoddy since it requires the external + * timer chip to generate periodic timer interupts. + *****************************************************************************/ + +/* used for system time scaling */ +static unsigned int bus_freq; +static u32 bus_cycle; /* length of one bus cycle in pico-seconds */ +static u32 bus_scale; /* scaling factor convert ns to bus cycles */ + +/* + * The timer chip is already set up at HZ interrupts per second here, + * but we do not accept timer interrupts yet. We only allow the BP + * to calibrate. + */ +static unsigned int __init get_8254_timer_count(void) +{ + /*extern spinlock_t i8253_lock;*/ + /*unsigned long flags;*/ + unsigned int count; + /*spin_lock_irqsave(&i8253_lock, flags);*/ + outb_p(0x00, 0x43); + count = inb_p(0x40); + count |= inb_p(0x40) << 8; + /*spin_unlock_irqrestore(&i8253_lock, flags);*/ + return count; +} + +void __init wait_8254_wraparound(void) +{ + unsigned int curr_count, prev_count=~0; + int delta; + curr_count = get_8254_timer_count(); + do { + prev_count = curr_count; + curr_count = get_8254_timer_count(); + delta = curr_count-prev_count; + /* + * This limit for delta seems arbitrary, but it isn't, it's slightly + * above the level of error a buggy Mercury/Neptune chipset timer can + * cause. + */ + } while (delta < 300); +} + +/* + * This function sets up the local APIC timer, with a timeout of + * 'clocks' APIC bus clock. During calibration we actually call + * this function with a very large value and read the current time after + * a well defined period of time as expired. + * + * Calibration is only performed once, for CPU0! + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. + */ +#define APIC_DIVISOR 1 +static void __setup_APIC_LVTT(unsigned int clocks) +{ + unsigned int lvtt1_value, tmp_value; + lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV)|LOCAL_TIMER_VECTOR; + apic_write_around(APIC_LVTT, lvtt1_value); + tmp_value = apic_read(APIC_TDCR); + apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1)); + apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); +} + +/* + * this is done for every CPU from setup_APIC_clocks() below. + * We setup each local APIC with a zero timeout value for now. + * Unlike Linux, we don't have to wait for slices etc. + */ +void setup_APIC_timer(void * data) +{ + unsigned long flags; + __save_flags(flags); + __sti(); + __setup_APIC_LVTT(0); + __restore_flags(flags); +} + +/* + * In this function we calibrate APIC bus clocks to the external timer. + * + * As a result we have the Bys Speed and CPU speed in Hz. + * + * We want to do the calibration only once (for CPU0). CPUs connected by the + * same APIC bus have the very same bus frequency. + * + * This bit is a bit shoddy since we use the very same periodic timer interrupt + * we try to eliminate to calibrate the APIC. + */ + +int __init calibrate_APIC_clock(void) +{ + unsigned long long t1 = 0, t2 = 0; + long tt1, tt2; + long result; + int i; + const int LOOPS = HZ/10; + + printk("Calibrating APIC timer for CPU%d...\n", smp_processor_id()); + + /* Put whatever arbitrary (but long enough) timeout + * value into the APIC clock, we just want to get the + * counter running for calibration. */ + __setup_APIC_LVTT(1000000000); + + /* The timer chip counts down to zero. Let's wait + * for a wraparound to start exact measurement: + * (the current tick might have been already half done) */ + wait_8254_wraparound(); + + /* We wrapped around just now. Let's start: */ + rdtscll(t1); + tt1 = apic_read(APIC_TMCCT); + + /* Let's wait LOOPS wraprounds: */ + for (i = 0; i < LOOPS; i++) + wait_8254_wraparound(); + + tt2 = apic_read(APIC_TMCCT); + rdtscll(t2); + + /* The APIC bus clock counter is 32 bits only, it + * might have overflown, but note that we use signed + * longs, thus no extra care needed. + * underflown to be exact, as the timer counts down ;) */ + result = (tt1-tt2)*APIC_DIVISOR/LOOPS; + + printk("..... CPU speed is %ld.%04ld MHz.\n", + ((long)(t2-t1)/LOOPS) / (1000000/HZ), + ((long)(t2-t1)/LOOPS) % (1000000/HZ)); + + printk("..... Bus speed is %ld.%04ld MHz.\n", + result / (1000000/HZ), + result % (1000000/HZ)); + + /* + * KAF: Moved this to time.c where it's calculated relative to the TSC. + * Therefore works on machines with no local APIC. + */ + /*cpu_freq = (u64)(((t2-t1)/LOOPS)*HZ);*/ + + /* set up multipliers for accurate timer code */ + bus_freq = result*HZ; + bus_cycle = (u32) (1000000000000LL/bus_freq); /* in pico seconds */ + bus_scale = (1000*262144)/bus_cycle; + + printk("..... bus_scale = 0x%08X\n", bus_scale); + /* reset APIC to zero timeout value */ + __setup_APIC_LVTT(0); + return result; +} + +/* + * initialise the APIC timers for all CPUs + * we start with the first and find out processor frequency and bus speed + */ +void __init setup_APIC_clocks (void) +{ + printk("Using local APIC timer interrupts.\n"); + using_apic_timer = 1; + __cli(); + /* calibrate CPU0 for CPU speed and BUS speed */ + bus_freq = calibrate_APIC_clock(); + /* Now set up the timer for real. */ + setup_APIC_timer((void *)bus_freq); + __sti(); + /* and update all other cpus */ + smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1); +} + +#undef APIC_DIVISOR + +/* + * reprogram the APIC timer. Timeoutvalue is in ns from start of boot + * returns 1 on success + * returns 0 if the timeout value is too small or in the past. + */ +int reprogram_ac_timer(s_time_t timeout) +{ + s_time_t now; + s_time_t expire; + u64 apic_tmict; + + /* + * We use this value because we don't trust zero (we think it may just + * cause an immediate interrupt). At least this is guaranteed to hold it + * off for ages (esp. since the clock ticks on bus clock, not cpu clock!). + */ + if ( timeout == 0 ) + { + apic_tmict = 0xffffffff; + goto reprogram; + } + + now = NOW(); + expire = timeout - now; /* value from now */ + + if ( expire <= 0 ) + { + Dprintk("APICT[%02d] Timeout in the past 0x%08X%08X > 0x%08X%08X\n", + smp_processor_id(), (u32)(now>>32), + (u32)now, (u32)(timeout>>32),(u32)timeout); + return 0; + } + + /* + * If we don't have local APIC then we just poll the timer list off the + * PIT interrupt. Cheesy but good enough to work on eg. VMware :-) + */ + if ( !cpu_has_apic ) + return 1; + + /* conversion to bus units */ + apic_tmict = (((u64)bus_scale) * expire)>>18; + + if ( apic_tmict >= 0xffffffff ) + { + Dprintk("APICT[%02d] Timeout value too large\n", smp_processor_id()); + apic_tmict = 0xffffffff; + } + + if ( apic_tmict == 0 ) + { + Dprintk("APICT[%02d] timeout value too small\n", smp_processor_id()); + return 0; + } + + reprogram: + /* Program the timer. */ + apic_write(APIC_TMICT, (unsigned long)apic_tmict); + + return 1; +} + +unsigned int apic_timer_irqs [NR_CPUS]; + +void smp_apic_timer_interrupt(struct pt_regs * regs) +{ + int cpu = smp_processor_id(); + + ack_APIC_irq(); + + apic_timer_irqs[cpu]++; + perfc_incrc(apic_timer); + + __cpu_raise_softirq(cpu, AC_TIMER_SOFTIRQ); +} + +/* + * This interrupt should _never_ happen with our APIC/SMP architecture + */ +asmlinkage void smp_spurious_interrupt(void) +{ + unsigned long v; + + /* + * Check if this really is a spurious interrupt and ACK it + * if it is a vectored one. Just in case... + * Spurious interrupts should not be ACKed. + */ + v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); + if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) + ack_APIC_irq(); + + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ + printk("spurious APIC interrupt on CPU#%d, should never happen.\n", + smp_processor_id()); +} + +/* + * This interrupt should never happen with our APIC/SMP architecture + */ + +asmlinkage void smp_error_interrupt(void) +{ + unsigned long v, v1; + + /* First tickle the hardware, only then report what went on. -- REW */ + v = apic_read(APIC_ESR); + apic_write(APIC_ESR, 0); + v1 = apic_read(APIC_ESR); + ack_APIC_irq(); + atomic_inc(&irq_err_count); + + /* Here is what the APIC error bits mean: + 0: Send CS error + 1: Receive CS error + 2: Send accept error + 3: Receive accept error + 4: Reserved + 5: Send illegal vector + 6: Received illegal vector + 7: Illegal register address + */ + printk ("APIC error on CPU%d: %02lx(%02lx)\n", + smp_processor_id(), v , v1); +} + +/* + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. + */ +int __init APIC_init_uniprocessor (void) +{ + if (!smp_found_config && !cpu_has_apic) + return -1; + + /* + * Complain if the BIOS pretends there is one. + */ + if (!cpu_has_apic&&APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) + { + printk("BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + return -1; + } + + verify_local_APIC(); + + connect_bsp_APIC(); + +#ifdef CONFIG_SMP + cpu_online_map = 1; +#endif + phys_cpu_present_map = 1; + apic_write_around(APIC_ID, boot_cpu_physical_apicid); + + apic_pm_init2(); + + setup_local_APIC(); + +#ifdef CONFIG_X86_IO_APIC + if (smp_found_config && nr_ioapics) + setup_IO_APIC(); +#endif + setup_APIC_clocks(); + + return 0; +} diff --git a/xen/arch/x86/boot/boot.S b/xen/arch/x86/boot/boot.S new file mode 100644 index 0000000000..ebb74c6562 --- /dev/null +++ b/xen/arch/x86/boot/boot.S @@ -0,0 +1,249 @@ +#include +#include +#include + +#define SECONDARY_CPU_FLAG 0xA5A5A5A5 + + .text + +ENTRY(start) + jmp hal_entry + + .align 4 + +/*** MULTIBOOT HEADER ****/ + /* Magic number indicating a Multiboot header. */ + .long 0x1BADB002 + /* Flags to bootloader (see Multiboot spec). */ + .long 0x00000002 + /* Checksum: must be the negated sum of the first two fields. */ + .long -0x1BADB004 + +hal_entry: + /* Set up a few descriptors: on entry only CS is guaranteed good. */ + lgdt %cs:nopaging_gdt_descr-__PAGE_OFFSET + mov $(__HYPERVISOR_DS),%ecx + mov %ecx,%ds + mov %ecx,%es + mov %ecx,%fs + mov %ecx,%gs + ljmp $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET +1: lss stack_start-__PAGE_OFFSET,%esp + + /* Reset EFLAGS (subsumes CLI and CLD). */ + pushl $0 + popf + + /* CPU type checks. We need P6+. */ + mov $0x200000,%edx + pushfl + pop %ecx + and %edx,%ecx + jne bad_cpu # ID bit should be clear + pushl %edx + popfl + pushfl + pop %ecx + and %edx,%ecx + je bad_cpu # ID bit should be set + + /* Set up CR0. */ + mov %cr0,%ecx + and $0x00000011,%ecx # save ET and PE + or $0x00050022,%ecx # set AM, WP, NE and MP + mov %ecx,%cr0 + + /* Set up FPU. */ + fninit + + /* Set up CR4, except global flag which Intel requires should be */ + /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */ + mov %cr4,%ecx + or mmu_cr4_features-__PAGE_OFFSET,%ecx + mov %ecx,mmu_cr4_features-__PAGE_OFFSET + and $0x7f,%ecx /* disable GLOBAL bit */ + mov %ecx,%cr4 + +#ifdef CONFIG_SMP + /* Is this a non-boot processor? */ + cmp $(SECONDARY_CPU_FLAG),%ebx + jne continue_boot_cpu + + call start_paging + lidt idt_descr + jmp start_secondary +#endif + +continue_boot_cpu: + add $__PAGE_OFFSET,%ebx + push %ebx /* Multiboot info struct */ + push %eax /* Multiboot magic value */ + + /* Initialize BSS (no nasty surprises!) */ + mov $__bss_start-__PAGE_OFFSET,%edi + mov $_end-__PAGE_OFFSET,%ecx + sub %edi,%ecx + xor %eax,%eax + rep stosb + + /* Copy all modules (dom0 + initrd if present) out of the Xen heap */ + mov (%esp),%eax + cmp $0x2BADB002,%eax + jne skip_dom0_copy + sub $__PAGE_OFFSET,%ebx /* turn back into a phys addr */ + mov 0x14(%ebx),%edi /* mbi->mods_count */ + dec %edi /* mbi->mods_count-- */ + jb skip_dom0_copy /* skip if no modules */ + mov 0x18(%ebx),%eax /* mbi->mods_addr */ + mov (%eax),%ebx /* %ebx = mod[0]->mod_start */ + shl $4,%edi + add %edi,%eax + mov 0x4(%eax),%eax /* %eax = mod[mod_count-1]->end */ + mov %eax,%ecx + sub %ebx,%ecx /* %ecx = byte len of all mods */ + mov $(MAX_DIRECTMAP_ADDRESS), %edi + add %ecx, %edi /* %edi = src + length */ + shr $2,%ecx /* %ecx = length/4 */ +1: sub $4,%eax /* %eax = src, %edi = dst */ + sub $4,%edi + mov (%eax),%ebx + mov %ebx,(%edi) + loop 1b +skip_dom0_copy: + + /* Initialize low and high mappings of all memory with 4MB pages */ + mov $idle_pg_table-__PAGE_OFFSET,%edi + mov $0x1e3,%eax /* PRESENT+RW+A+D+4MB+GLOBAL */ +1: mov %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */ + stosl /* low mapping */ + add $(1<physical mapping table. Ring 0 can access all memory. */ +ENTRY(gdt_table) + .fill FIRST_RESERVED_GDT_ENTRY,8,0 + .quad 0x0000000000000000 /* unused */ + .quad 0x00cf9a000000ffff /* 0x0808 ring 0 4.00GB code at 0x0 */ + .quad 0x00cf92000000ffff /* 0x0810 ring 0 4.00GB data at 0x0 */ + .quad 0x00cfba000000c3ff /* 0x0819 ring 1 3.95GB code at 0x0 */ + .quad 0x00cfb2000000c3ff /* 0x0821 ring 1 3.95GB data at 0x0 */ + .quad 0x00cffa000000c3ff /* 0x082b ring 3 3.95GB code at 0x0 */ + .quad 0x00cff2000000c3ff /* 0x0833 ring 3 3.95GB data at 0x0 */ + .quad 0x0000000000000000 /* unused */ + .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ + + .org 0x1000 +ENTRY(idle_pg_table) # Initial page directory is 4kB + .org 0x2000 +ENTRY(cpu0_stack) # Initial stack is 8kB + .org 0x4000 +ENTRY(stext) +ENTRY(_stext) diff --git a/xen/arch/x86/delay.c b/xen/arch/x86/delay.c new file mode 100644 index 0000000000..cde5e18b5f --- /dev/null +++ b/xen/arch/x86/delay.c @@ -0,0 +1,29 @@ +/* + * Precise Delay Loops for i386 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +#include +#include +#include +#include + +void __udelay(unsigned long usecs) +{ + unsigned long ticks = usecs * ticks_per_usec; + unsigned long s, e; + + rdtscl(s); + do + { + rep_nop(); + rdtscl(e); + } while ((e-s) < ticks); +} diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c new file mode 100644 index 0000000000..23b29a0c6c --- /dev/null +++ b/xen/arch/x86/domain_page.c @@ -0,0 +1,81 @@ +/****************************************************************************** + * domain_page.h + * + * Allow temporary mapping of domain pages. Based on ideas from the + * Linux PKMAP code -- the copyrights and credits are retained below. + */ + +/* + * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de + * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de * + * Copyright (C) 1999 Ingo Molnar + */ + +#include +#include +#include +#include +#include +#include + +unsigned long *mapcache; +static unsigned int map_idx, shadow_map_idx[NR_CPUS]; +static spinlock_t map_lock = SPIN_LOCK_UNLOCKED; + +/* Use a spare PTE bit to mark entries ready for recycling. */ +#define READY_FOR_TLB_FLUSH (1<<10) + +static void flush_all_ready_maps(void) +{ + unsigned long *cache = mapcache; + + /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */ + do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; } + while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 ); + + perfc_incrc(domain_page_tlb_flush); + local_flush_tlb(); +} + + +void *map_domain_mem(unsigned long pa) +{ + unsigned long va; + unsigned int idx, cpu = smp_processor_id(); + unsigned long *cache = mapcache; + unsigned long flags; + + perfc_incrc(map_domain_mem_count); + + spin_lock_irqsave(&map_lock, flags); + + /* Has some other CPU caused a wrap? We must flush if so. */ + if ( map_idx < shadow_map_idx[cpu] ) + { + perfc_incrc(domain_page_tlb_flush); + local_flush_tlb(); + } + + for ( ; ; ) + { + idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1); + if ( idx == 0 ) flush_all_ready_maps(); + if ( cache[idx] == 0 ) break; + } + + cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR; + + spin_unlock_irqrestore(&map_lock, flags); + + shadow_map_idx[cpu] = idx; + + va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK); + return (void *)va; +} + +void unmap_domain_mem(void *va) +{ + unsigned int idx; + idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT; + mapcache[idx] |= READY_FOR_TLB_FLUSH; +} diff --git a/xen/arch/x86/entry.S b/xen/arch/x86/entry.S new file mode 100644 index 0000000000..f710ba51ad --- /dev/null +++ b/xen/arch/x86/entry.S @@ -0,0 +1,736 @@ +/* + * linux/arch/i386/entry.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * entry.S contains the system-call and fault low-level handling routines. + * This also contains the timer-interrupt handler, as well as all interrupts + * and faults that can result in a task-switch. + * + * Stack layout in 'ret_from_system_call': + * 0(%esp) - %ebx + * 4(%esp) - %ecx + * 8(%esp) - %edx + * C(%esp) - %esi + * 10(%esp) - %edi + * 14(%esp) - %ebp + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es + * 24(%esp) - %fs + * 28(%esp) - %gs + * 2C(%esp) - orig_eax + * 30(%esp) - %eip + * 34(%esp) - %cs + * 38(%esp) - %eflags + * 3C(%esp) - %oldesp + * 40(%esp) - %oldss + * + * "current" is in register %ebx during any slow entries. + */ +/* The idea for callbacks from monitor -> guest OS. + * + * First, we require that all callbacks (either via a supplied + * interrupt-descriptor-table, or via the special event or failsafe callbacks + * in the shared-info-structure) are to ring 1. This just makes life easier, + * in that it means we don't have to do messy GDT/LDT lookups to find + * out which the privilege-level of the return code-selector. That code + * would just be a hassle to write, and would need to account for running + * off the end of the GDT/LDT, for example. For all callbacks we check + * that the provided + * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as + * don't allow a guest OS to install ring-0 privileges into the GDT/LDT. + * It's up to the guest OS to ensure all returns via the IDT are to ring 1. + * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather + * than the correct ring) and bad things are bound to ensue -- IRET is + * likely to fault, and we may end up killing the domain (no harm can + * come to the hypervisor itself, though). + * + * When doing a callback, we check if the return CS is in ring 0. If so, + * callback is delayed until next return to ring != 0. + * If return CS is in ring 1, then we create a callback frame + * starting at return SS/ESP. The base of the frame does an intra-privilege + * interrupt-return. + * If return CS is in ring > 1, we create a callback frame starting + * at SS/ESP taken from appropriate section of the current TSS. The base + * of the frame does an inter-privilege interrupt-return. + * + * Note that the "failsafe callback" uses a special stackframe: + * { return_DS, return_ES, return_FS, return_GS, return_EIP, + * return_CS, return_EFLAGS[, return_ESP, return_SS] } + * That is, original values for DS/ES/FS/GS are placed on stack rather than + * in DS/ES/FS/GS themselves. Why? It saves us loading them, only to have them + * saved/restored in guest OS. Furthermore, if we load them we may cause + * a fault if they are invalid, which is a hassle to deal with. We avoid + * that problem if we don't load them :-) This property allows us to use + * the failsafe callback as a fallback: if we ever fault on loading DS/ES/FS/GS + * on return to ring != 0, we can simply package it up as a return via + * the failsafe callback, and let the guest OS sort it out (perhaps by + * killing an application process). Note that we also do this for any + * faulting IRET -- just let the guest OS handle it via the event + * callback. + * + * We terminate a domain in the following cases: + * - creating a callback stack frame (due to bad ring-1 stack). + * - faulting IRET on entry to failsafe callback handler. + * So, each domain must keep its ring-1 %ss/%esp and failsafe callback + * handler in good order (absolutely no faults allowed!). + */ + +#include +#include +#include + +EBX = 0x00 +ECX = 0x04 +EDX = 0x08 +ESI = 0x0C +EDI = 0x10 +EBP = 0x14 +EAX = 0x18 +DS = 0x1C +ES = 0x20 +FS = 0x24 +GS = 0x28 +ORIG_EAX = 0x2C +EIP = 0x30 +CS = 0x34 +EFLAGS = 0x38 +OLDESP = 0x3C +OLDSS = 0x40 + +/* Offsets in task_struct */ +PROCESSOR = 0 +HYP_EVENTS = 2 +SHARED_INFO = 4 +EVENT_SEL = 8 +EVENT_ADDR = 12 +FAILSAFE_BUFFER = 16 +FAILSAFE_SEL = 32 +FAILSAFE_ADDR = 36 + +/* Offsets in shared_info_t */ +#define UPCALL_PENDING /* 0 */ +#define UPCALL_MASK 1 + +/* Offsets in guest_trap_bounce */ +GTB_ERROR_CODE = 0 +GTB_CR2 = 4 +GTB_FLAGS = 8 +GTB_CS = 10 +GTB_EIP = 12 +GTBF_TRAP = 1 +GTBF_TRAP_NOCODE = 2 +GTBF_TRAP_CR2 = 4 + +CF_MASK = 0x00000001 +IF_MASK = 0x00000200 +NT_MASK = 0x00004000 + + + +#define SAVE_ALL_NOSEGREGS \ + cld; \ + pushl %gs; \ + pushl %fs; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + +#define SAVE_ALL \ + SAVE_ALL_NOSEGREGS \ + movl $(__HYPERVISOR_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; \ + movl %edx,%fs; \ + movl %edx,%gs; \ + sti; + +#define GET_CURRENT(reg) \ + movl $4096-4, reg; \ + orl %esp, reg; \ + andl $~3,reg; \ + movl (reg),reg; + +ENTRY(continue_nonidle_task) + GET_CURRENT(%ebx) + jmp test_all_events + + ALIGN +/* + * HYPERVISOR_multicall(call_list, nr_calls) + * Execute a list of 'nr_calls' system calls, pointed at by 'call_list'. + * This is fairly easy except that: + * 1. We may fault reading the call list, and must patch that up; and + * 2. We cannot recursively call HYPERVISOR_multicall, or a malicious + * caller could cause our stack to blow up. + */ +do_multicall: + popl %eax + cmpl $SYMBOL_NAME(multicall_return_from_call),%eax + je multicall_return_from_call + pushl %ebx + movl 4(%esp),%ebx /* EBX == call_list */ + movl 8(%esp),%ecx /* ECX == nr_calls */ +multicall_loop: + pushl %ecx +multicall_fault1: + pushl 20(%ebx) # args[4] +multicall_fault2: + pushl 16(%ebx) # args[3] +multicall_fault3: + pushl 12(%ebx) # args[2] +multicall_fault4: + pushl 8(%ebx) # args[1] +multicall_fault5: + pushl 4(%ebx) # args[0] +multicall_fault6: + movl (%ebx),%eax # op + andl $255,%eax + call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4) +multicall_return_from_call: +multicall_fault7: + movl %eax,24(%ebx) # args[5] == result + addl $20,%esp + popl %ecx + addl $(ARGS_PER_MULTICALL_ENTRY*4),%ebx + loop multicall_loop + popl %ebx + xorl %eax,%eax + jmp ret_from_hypervisor_call + +.section __ex_table,"a" + .align 4 + .long multicall_fault1, multicall_fixup1 + .long multicall_fault2, multicall_fixup2 + .long multicall_fault3, multicall_fixup3 + .long multicall_fault4, multicall_fixup4 + .long multicall_fault5, multicall_fixup5 + .long multicall_fault6, multicall_fixup6 +.previous + +.section .fixup,"ax" +multicall_fixup6: + addl $4,%esp +multicall_fixup5: + addl $4,%esp +multicall_fixup4: + addl $4,%esp +multicall_fixup3: + addl $4,%esp +multicall_fixup2: + addl $4,%esp +multicall_fixup1: + addl $4,%esp + popl %ebx + movl $-EFAULT,%eax + jmp ret_from_hypervisor_call +.previous + + ALIGN +restore_all_guest: + # First, may need to restore %ds if clobbered by create_bounce_frame + pushl %ss + popl %ds + # Second, create a failsafe copy of DS,ES,FS,GS in case any are bad + leal DS(%esp),%esi + leal FAILSAFE_BUFFER(%ebx),%edi + movsl + movsl + movsl + movsl + # Finally, restore guest registers -- faults will cause failsafe + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax +1: popl %ds +2: popl %es +3: popl %fs +4: popl %gs + addl $4,%esp +5: iret +.section .fixup,"ax" +10: subl $4,%esp + pushl %gs +9: pushl %fs +8: pushl %es +7: pushl %ds +6: pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + pushl %ss + popl %ds + pushl %ss + popl %es + jmp failsafe_callback +.previous +.section __ex_table,"a" + .align 4 + .long 1b,6b + .long 2b,7b + .long 3b,8b + .long 4b,9b + .long 5b,10b +.previous + +/* No special register assumptions */ +failsafe_callback: + GET_CURRENT(%ebx) + movzwl PROCESSOR(%ebx),%eax + shl $4,%eax + lea guest_trap_bounce(%eax),%edx + movl FAILSAFE_ADDR(%ebx),%eax + movl %eax,GTB_EIP(%edx) + movl FAILSAFE_SEL(%ebx),%eax + movw %ax,GTB_CS(%edx) + call create_bounce_frame + subl $16,%esi # add DS/ES/FS/GS to failsafe stack frame + leal FAILSAFE_BUFFER(%ebx),%ebp + movl 0(%ebp),%eax # DS +FAULT1: movl %eax,(%esi) + movl 4(%ebp),%eax # ES +FAULT2: movl %eax,4(%esi) + movl 8(%ebp),%eax # FS +FAULT3: movl %eax,8(%esi) + movl 12(%ebp),%eax # GS +FAULT4: movl %eax,12(%esi) + movl %esi,OLDESP(%esp) + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + addl $20,%esp # skip DS/ES/FS/GS/ORIG_EAX +FAULT5: iret + + + ALIGN +# Simple restore -- we should never fault as we we will only interrupt ring 0 +# when sane values have been placed in all registers. The only exception is +# NMI, which may interrupt before good values have been placed in DS-GS. +# The NMI return code deals with this problem itself. +restore_all_xen: + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + popl %fs + popl %gs + addl $4,%esp + iret + + ALIGN +ENTRY(hypervisor_call) + pushl %eax # save orig_eax + SAVE_ALL + GET_CURRENT(%ebx) + andl $255,%eax + call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4) + +ret_from_hypervisor_call: + movl %eax,EAX(%esp) # save the return value + +test_all_events: + xorl %ecx,%ecx + notl %ecx + cli # tests must not race interrupts +/*test_softirqs:*/ + movzwl PROCESSOR(%ebx),%eax + shl $6,%eax # sizeof(irq_cpustat) == 64 + test %ecx,SYMBOL_NAME(irq_stat)(%eax,1) + jnz process_softirqs +/*test_hyp_events:*/ + testw %cx, HYP_EVENTS(%ebx) + jnz process_hyp_events +/*test_guest_events:*/ + movl SHARED_INFO(%ebx),%eax + testb $0xFF,UPCALL_MASK(%eax) + jnz restore_all_guest + testb $0xFF,UPCALL_PENDING(%eax) + jz restore_all_guest + movb $1,UPCALL_MASK(%eax) # Upcalls are masked during delivery +/*process_guest_events:*/ + movzwl PROCESSOR(%ebx),%edx + shl $4,%edx # sizeof(guest_trap_bounce) == 16 + lea guest_trap_bounce(%edx),%edx + movl EVENT_ADDR(%ebx),%eax + movl %eax,GTB_EIP(%edx) + movl EVENT_SEL(%ebx),%eax + movw %ax,GTB_CS(%edx) + call create_bounce_frame + jmp restore_all_guest + + ALIGN +process_softirqs: + sti + call SYMBOL_NAME(do_softirq) + jmp test_all_events + + ALIGN +process_hyp_events: + sti + call SYMBOL_NAME(do_hyp_events) + jmp test_all_events + +/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */ +/* {EIP, CS, EFLAGS, [ESP, SS]} */ +/* %edx == guest_trap_bounce, %ebx == task_struct */ +/* %eax,%ecx are clobbered. %ds:%esi contain new OLDSS/OLDESP. */ +create_bounce_frame: + mov CS+4(%esp),%cl + test $2,%cl + jz 1f /* jump if returning to an existing ring-1 activation */ + /* obtain ss/esp from TSS -- no current ring-1 activations */ + movzwl PROCESSOR(%ebx),%eax + /* next 4 lines multiply %eax by 8320, which is sizeof(tss_struct) */ + movl %eax, %ecx + shll $7, %ecx + shll $13, %eax + addl %ecx,%eax + addl $init_tss + 12,%eax + movl (%eax),%esi /* tss->esp1 */ +FAULT6: movl 4(%eax),%ds /* tss->ss1 */ + /* base of stack frame must contain ss/esp (inter-priv iret) */ + subl $8,%esi + movl OLDESP+4(%esp),%eax +FAULT7: movl %eax,(%esi) + movl OLDSS+4(%esp),%eax +FAULT8: movl %eax,4(%esi) + jmp 2f +1: /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */ + movl OLDESP+4(%esp),%esi +FAULT9: movl OLDSS+4(%esp),%ds +2: /* Construct a stack frame: EFLAGS, CS/EIP */ + subl $12,%esi + movl EIP+4(%esp),%eax +FAULT10:movl %eax,(%esi) + movl CS+4(%esp),%eax +FAULT11:movl %eax,4(%esi) + movl EFLAGS+4(%esp),%eax +FAULT12:movl %eax,8(%esi) + /* Rewrite our stack frame and return to ring 1. */ + /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ + andl $0xfffcbeff,%eax + movl %eax,EFLAGS+4(%esp) + movl %ds,OLDSS+4(%esp) + movl %esi,OLDESP+4(%esp) + movzwl %es:GTB_CS(%edx),%eax + movl %eax,CS+4(%esp) + movl %es:GTB_EIP(%edx),%eax + movl %eax,EIP+4(%esp) + ret + + +.section __ex_table,"a" + .align 4 + .long FAULT1, crash_domain_fixup3 # Fault writing to ring-1 stack + .long FAULT2, crash_domain_fixup3 # Fault writing to ring-1 stack + .long FAULT3, crash_domain_fixup3 # Fault writing to ring-1 stack + .long FAULT4, crash_domain_fixup3 # Fault writing to ring-1 stack + .long FAULT5, crash_domain_fixup1 # Fault executing failsafe iret + .long FAULT6, crash_domain_fixup2 # Fault loading ring-1 stack selector + .long FAULT7, crash_domain_fixup2 # Fault writing to ring-1 stack + .long FAULT8, crash_domain_fixup2 # Fault writing to ring-1 stack + .long FAULT9, crash_domain_fixup2 # Fault loading ring-1 stack selector + .long FAULT10,crash_domain_fixup2 # Fault writing to ring-1 stack + .long FAULT11,crash_domain_fixup2 # Fault writing to ring-1 stack + .long FAULT12,crash_domain_fixup2 # Fault writing to ring-1 stack + .long FAULT13,crash_domain_fixup3 # Fault writing to ring-1 stack + .long FAULT14,crash_domain_fixup3 # Fault writing to ring-1 stack +.previous + +# This handler kills domains which experience unrecoverable faults. +.section .fixup,"ax" +crash_domain_fixup1: + subl $4,%esp + SAVE_ALL + jmp crash_domain +crash_domain_fixup2: + addl $4,%esp +crash_domain_fixup3: + pushl %ss + popl %ds + jmp crash_domain +.previous + + ALIGN +process_guest_exception_and_events: + movzwl PROCESSOR(%ebx),%eax + shl $4,%eax + lea guest_trap_bounce(%eax),%edx + testb $~0,GTB_FLAGS(%edx) + jz test_all_events + call create_bounce_frame # just the basic frame + mov %es:GTB_FLAGS(%edx),%cl + test $GTBF_TRAP_NOCODE,%cl + jnz 2f + subl $4,%esi # push error_code onto guest frame + movl %es:GTB_ERROR_CODE(%edx),%eax +FAULT13:movl %eax,(%esi) + test $GTBF_TRAP_CR2,%cl + jz 1f + subl $4,%esi # push %cr2 onto guest frame + movl %es:GTB_CR2(%edx),%eax +FAULT14:movl %eax,(%esi) +1: movl %esi,OLDESP(%esp) +2: push %es # unclobber %ds + pop %ds + movb $0,GTB_FLAGS(%edx) + jmp test_all_events + + ALIGN +ENTRY(ret_from_intr) + GET_CURRENT(%ebx) + movb CS(%esp),%al + testb $3,%al # return to non-supervisor? + jne test_all_events + jmp restore_all_xen + +ENTRY(divide_error) + pushl $0 # no error code + pushl $ SYMBOL_NAME(do_divide_error) + ALIGN +error_code: + pushl %fs + pushl %es + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %gs,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl GS(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, GS(%esp) + movl $(__HYPERVISOR_DS),%edx + movl %edx,%ds + movl %edx,%es + movl %edx,%fs + movl %edx,%gs + movl %esp,%edx + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + GET_CURRENT(%ebx) + call *%edi + addl $8,%esp + movb CS(%esp),%al + testb $3,%al + je restore_all_xen + jmp process_guest_exception_and_events + +ENTRY(coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_error) + jmp error_code + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_simd_coprocessor_error) + jmp error_code + +ENTRY(device_not_available) + pushl $0 + pushl $SYMBOL_NAME(math_state_restore) + jmp error_code + +ENTRY(debug) + pushl $0 + pushl $ SYMBOL_NAME(do_debug) + jmp error_code + +ENTRY(int3) + pushl $0 + pushl $ SYMBOL_NAME(do_int3) + jmp error_code + +ENTRY(overflow) + pushl $0 + pushl $ SYMBOL_NAME(do_overflow) + jmp error_code + +ENTRY(bounds) + pushl $0 + pushl $ SYMBOL_NAME(do_bounds) + jmp error_code + +ENTRY(invalid_op) + pushl $0 + pushl $ SYMBOL_NAME(do_invalid_op) + jmp error_code + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) + jmp error_code + +ENTRY(invalid_TSS) + pushl $ SYMBOL_NAME(do_invalid_TSS) + jmp error_code + +ENTRY(segment_not_present) + pushl $ SYMBOL_NAME(do_segment_not_present) + jmp error_code + +ENTRY(stack_segment) + pushl $ SYMBOL_NAME(do_stack_segment) + jmp error_code + +ENTRY(general_protection) + pushl $ SYMBOL_NAME(do_general_protection) + jmp error_code + +ENTRY(alignment_check) + pushl $ SYMBOL_NAME(do_alignment_check) + jmp error_code + +ENTRY(page_fault) + pushl $ SYMBOL_NAME(do_page_fault) + jmp error_code + +ENTRY(machine_check) + pushl $0 + pushl $ SYMBOL_NAME(do_machine_check) + jmp error_code + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) + jmp error_code + +ENTRY(nmi) + # Save state but do not trash the segment registers! + # We may otherwise be unable to reload them or copy them to ring 1. + pushl %eax + SAVE_ALL_NOSEGREGS + + # Check for hardware problems. These are always fatal so we can + # reload DS and ES when handling them. + inb $0x61,%al + testb $0x80,%al + jne nmi_parity_err + testb $0x40,%al + jne nmi_io_err + movl %eax,%ebx + + # Okay, its almost a normal NMI tick. We can only process it if: + # A. We are the outermost Xen activation (in which case we have + # the selectors safely saved on our stack) + # B. DS-GS all contain sane Xen values. + # In all other cases we bail without touching DS-GS, as we have + # interrupted an enclosing Xen activation in tricky prologue or + # epilogue code. + movb CS(%esp),%al + testb $3,%al + jne do_watchdog_tick + movl DS(%esp),%eax + cmpw $(__HYPERVISOR_DS),%ax + jne nmi_badseg + movl ES(%esp),%eax + cmpw $(__HYPERVISOR_DS),%ax + jne nmi_badseg + movl FS(%esp),%eax + cmpw $(__HYPERVISOR_DS),%ax + jne nmi_badseg + movl GS(%esp),%eax + cmpw $(__HYPERVISOR_DS),%ax + jne nmi_badseg + +do_watchdog_tick: + movl $(__HYPERVISOR_DS),%edx + movl %edx,%ds + movl %edx,%es + movl %esp,%edx + pushl %ebx # reason + pushl %edx # regs + call SYMBOL_NAME(do_nmi) + addl $8,%esp + movb CS(%esp),%al + testb $3,%al + je restore_all_xen + GET_CURRENT(%ebx) + jmp restore_all_guest + +nmi_badseg: + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + addl $20,%esp + iret + +nmi_parity_err: + movl $(__HYPERVISOR_DS),%edx + movl %edx,%ds + movl %edx,%es + jmp SYMBOL_NAME(mem_parity_error) + +nmi_io_err: + movl $(__HYPERVISOR_DS),%edx + movl %edx,%ds + movl %edx,%es + jmp SYMBOL_NAME(io_check_error) + +.data +ENTRY(hypervisor_call_table) + .long SYMBOL_NAME(do_set_trap_table) /* 0 */ + .long SYMBOL_NAME(do_mmu_update) + .long SYMBOL_NAME(do_console_write) + .long SYMBOL_NAME(do_set_gdt) + .long SYMBOL_NAME(do_stack_switch) + .long SYMBOL_NAME(do_set_callbacks) /* 5 */ + .long SYMBOL_NAME(do_ni_syscall) # do_net_io_op + .long SYMBOL_NAME(do_fpu_taskswitch) + .long SYMBOL_NAME(do_sched_op) + .long SYMBOL_NAME(do_dom0_op) + .long SYMBOL_NAME(do_ni_syscall) /* 10 */ # do_network_op + .long SYMBOL_NAME(do_ni_syscall) # do_block_io_op + .long SYMBOL_NAME(do_set_debugreg) + .long SYMBOL_NAME(do_get_debugreg) + .long SYMBOL_NAME(do_update_descriptor) + .long SYMBOL_NAME(do_set_fast_trap) /* 15 */ + .long SYMBOL_NAME(do_dom_mem_op) + .long SYMBOL_NAME(do_multicall) + .long SYMBOL_NAME(do_kbd_op) + .long SYMBOL_NAME(do_update_va_mapping) + .long SYMBOL_NAME(do_set_timer_op) /* 20 */ + .long SYMBOL_NAME(do_event_channel_op) + .long SYMBOL_NAME(do_xen_version) + .long SYMBOL_NAME(do_console_io) + .long SYMBOL_NAME(do_physdev_op) + .long SYMBOL_NAME(do_update_va_mapping_otherdomain) /* 25 */ + .rept NR_syscalls-((.-hypervisor_call_table)/4) + .long SYMBOL_NAME(do_ni_syscall) + .endr diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c new file mode 100644 index 0000000000..af37b86013 --- /dev/null +++ b/xen/arch/x86/extable.c @@ -0,0 +1,62 @@ +/* + * linux/arch/i386/mm/extable.c + */ + +#include +#include +#include +#include + +extern const struct exception_table_entry __start___ex_table[]; +extern const struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long +search_one_table(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + while (first <= last) { + const struct exception_table_entry *mid; + long diff; + + mid = (last - first) / 2 + first; + diff = mid->insn - value; + if (diff == 0) + return mid->fixup; + else if (diff < 0) + first = mid+1; + else + last = mid-1; + } + return 0; +} + +extern spinlock_t modlist_lock; + +unsigned long +search_exception_table(unsigned long addr) +{ + unsigned long ret = 0; + +#ifndef CONFIG_MODULES + /* There is only the kernel to search. */ + ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr); + return ret; +#else + unsigned long flags; + /* The kernel is the last "module" -- no need to treat it special. */ + struct module *mp; + + spin_lock_irqsave(&modlist_lock, flags); + for (mp = module_list; mp != NULL; mp = mp->next) { + if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING))) + continue; + ret = search_one_table(mp->ex_table_start, + mp->ex_table_end - 1, addr); + if (ret) + break; + } + spin_unlock_irqrestore(&modlist_lock, flags); + return ret; +#endif +} diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c new file mode 100644 index 0000000000..c53f52161c --- /dev/null +++ b/xen/arch/x86/flushtlb.c @@ -0,0 +1,40 @@ +/****************************************************************************** + * flushtlb.c + * + * TLB flushes are timestamped using a global virtual 'clock' which ticks + * on any TLB flush on any processor. + * + * Copyright (c) 2003, K A Fraser + */ + +#include +#include +#include +#include + +u32 tlbflush_clock; +u32 tlbflush_time[NR_CPUS]; + +void tlb_clocktick(void) +{ + u32 y, ny; + + /* Tick the clock. 'y' contains the current time after the tick. */ + ny = tlbflush_clock; + do { +#ifdef CONFIG_SMP + if ( unlikely(((y = ny+1) & TLBCLOCK_EPOCH_MASK) == 0) ) + { + raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ); + y = tlbflush_clock; + break; + } +#else + y = ny+1; +#endif + } + while ( unlikely((ny = cmpxchg(&tlbflush_clock, y-1, y)) != y-1) ); + + /* Update this CPU's timestamp to new time. */ + tlbflush_time[smp_processor_id()] = y; +} diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c new file mode 100644 index 0000000000..34cd1c9b3b --- /dev/null +++ b/xen/arch/x86/i387.c @@ -0,0 +1,56 @@ +/* + * linux/arch/i386/kernel/i387.c + * + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + */ + +#include +#include +#include +#include + +void init_fpu(void) +{ + __asm__("fninit"); + if ( cpu_has_xmm ) load_mxcsr(0x1f80); + set_bit(PF_DONEFPUINIT, ¤t->flags); +} + +static inline void __save_init_fpu( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + asm volatile( "fxsave %0 ; fnclex" + : "=m" (tsk->thread.i387.fxsave) ); + } else { + asm volatile( "fnsave %0 ; fwait" + : "=m" (tsk->thread.i387.fsave) ); + } + clear_bit(PF_USEDFPU, &tsk->flags); +} + +void save_init_fpu( struct task_struct *tsk ) +{ + /* + * The guest OS may have set the 'virtual STTS' flag. + * This causes us to set the real flag, so we'll need + * to temporarily clear it while saving f-p state. + */ + if ( test_bit(PF_GUEST_STTS, &tsk->flags) ) clts(); + __save_init_fpu(tsk); + stts(); +} + +void restore_fpu( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + asm volatile( "fxrstor %0" + : : "m" (tsk->thread.i387.fxsave) ); + } else { + asm volatile( "frstor %0" + : : "m" (tsk->thread.i387.fsave) ); + } +} diff --git a/xen/arch/x86/i8259.c b/xen/arch/x86/i8259.c new file mode 100644 index 0000000000..58ecb12553 --- /dev/null +++ b/xen/arch/x86/i8259.c @@ -0,0 +1,470 @@ +/****************************************************************************** + * i8259.c + * + * Well, this is required for SMP systems as well, as it build interrupt + * tables for IO APICS as well as uniprocessor 8259-alikes. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +/* + * Common place to define all x86 IRQ vectors + * + * This builds up the IRQ handler stubs using some ugly macros in irq.h + * + * These macros create the low-level assembly IRQ routines that save + * register context and call do_IRQ(). do_IRQ() then does all the + * operations that are needed to keep the AT (or SMP IOAPIC) + * interrupt-controller happy. + */ + +BUILD_COMMON_IRQ() + +#define BI(x,y) \ + BUILD_IRQ(x##y) + +#define BUILD_16_IRQS(x) \ + BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ + BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ + BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ + BI(x,c) BI(x,d) BI(x,e) BI(x,f) + +/* + * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: + * (these are usually mapped to vectors 0x30-0x3f) + */ + BUILD_16_IRQS(0x0) + +#ifdef CONFIG_X86_IO_APIC +/* + * The IO-APIC gives us many more interrupt sources. Most of these + * are unused but an SMP system is supposed to have enough memory ... + * sometimes (mostly wrt. hw bugs) we get corrupted vectors all + * across the spectrum, so we really want to be prepared to get all + * of these. Plus, more powerful systems might have more than 64 + * IO-APIC registers. + * + * (these are usually mapped into the 0x30-0xff vector range) + */ + BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) + BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) + BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) + BUILD_16_IRQS(0xc) +#endif + +#undef BUILD_16_IRQS +#undef BI + + +/* + * The following vectors are part of the Linux architecture, there + * is no hardware IRQ pin equivalent for them, they are triggered + * through the ICC by us (IPIs) + */ +#ifdef CONFIG_SMP + BUILD_SMP_INTERRUPT(event_check_interrupt,EVENT_CHECK_VECTOR) + BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) + BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) +#endif + +/* + * every pentium local APIC has two 'local interrupts', with a + * soft-definable vector attached to both interrupts, one of + * which is a timer interrupt, the other one is error counter + * overflow. Linux uses the local APIC timer interrupt to get + * a much simpler SMP time architecture: + */ +#ifdef CONFIG_X86_LOCAL_APIC + BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) + BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) + BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +#endif + +#define IRQ(x,y) \ + IRQ##x##y##_interrupt + +#define IRQLIST_16(x) \ + IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ + IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ + IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ + IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) + + void (*interrupt[NR_IRQS])(void) = { + IRQLIST_16(0x0), + +#ifdef CONFIG_X86_IO_APIC + IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3), + IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), + IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), + IRQLIST_16(0xc) +#endif + }; + +#undef IRQ +#undef IRQLIST_16 + +/* + * This is the 'legacy' 8259A Programmable Interrupt Controller, + * present in the majority of PC/AT boxes. + * plus some generic x86 specific things if generic specifics makes + * any sense at all. + * this file should become arch/i386/kernel/irq.c when the old irq.c + * moves to arch independent land + */ + +spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED; + +static void end_8259A_irq (unsigned int irq) +{ + if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) + enable_8259A_irq(irq); +} + +#define shutdown_8259A_irq disable_8259A_irq + +void mask_and_ack_8259A(unsigned int); + +static unsigned int startup_8259A_irq(unsigned int irq) +{ + enable_8259A_irq(irq); + return 0; /* never anything pending */ +} + +static struct hw_interrupt_type i8259A_irq_type = { + "XT-PIC", + startup_8259A_irq, + shutdown_8259A_irq, + enable_8259A_irq, + disable_8259A_irq, + mask_and_ack_8259A, + end_8259A_irq, + NULL +}; + +/* + * 8259A PIC functions to handle ISA devices: + */ + +/* + * This contains the irq mask for both 8259A irq controllers, + */ +static unsigned int cached_irq_mask = 0xffff; + +#define __byte(x,y) (((unsigned char *)&(y))[x]) +#define cached_21 (__byte(0,cached_irq_mask)) +#define cached_A1 (__byte(1,cached_irq_mask)) + +/* + * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) + * boards the timer interrupt is not really connected to any IO-APIC pin, + * it's fed to the master 8259A's IR0 line only. + * + * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. + * this 'mixed mode' IRQ handling costs nothing because it's only used + * at IRQ setup time. + */ +unsigned long io_apic_irqs; + +void disable_8259A_irq(unsigned int irq) +{ + unsigned int mask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask |= mask; + if (irq & 8) + outb(cached_A1,0xA1); + else + outb(cached_21,0x21); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void enable_8259A_irq(unsigned int irq) +{ + unsigned int mask = ~(1 << irq); + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask &= mask; + if (irq & 8) + outb(cached_A1,0xA1); + else + outb(cached_21,0x21); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +int i8259A_irq_pending(unsigned int irq) +{ + unsigned int mask = 1<> 8); + spin_unlock_irqrestore(&i8259A_lock, flags); + + return ret; +} + +void make_8259A_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + io_apic_irqs &= ~(1<> 8); + outb(0x0A,0xA0); /* back to the IRR register */ + return value; +} + +/* + * Careful! The 8259A is a fragile beast, it pretty + * much _has_ to be done exactly like this (mask it + * first, _then_ send the EOI, and the order of EOI + * to the two 8259s is important! + */ +void mask_and_ack_8259A(unsigned int irq) +{ + unsigned int irqmask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + /* + * Lightweight spurious IRQ detection. We do not want + * to overdo spurious IRQ handling - it's usually a sign + * of hardware problems, so we only do the checks we can + * do without slowing down good hardware unnecesserily. + * + * Note that IRQ7 and IRQ15 (the two spurious IRQs + * usually resulting from the 8259A-1|2 PICs) occur + * even if the IRQ is masked in the 8259A. Thus we + * can check spurious 8259A IRQs without doing the + * quite slow i8259A_irq_real() call for every IRQ. + * This does not cover 100% of spurious interrupts, + * but should be enough to warn the user that there + * is something bad going on ... + */ + if (cached_irq_mask & irqmask) + goto spurious_8259A_irq; + cached_irq_mask |= irqmask; + + handle_real_irq: + if (irq & 8) { + inb(0xA1); /* DUMMY - (do we need this?) */ + outb(cached_A1,0xA1); + outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */ + outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */ + } else { + inb(0x21); /* DUMMY - (do we need this?) */ + outb(cached_21,0x21); + outb(0x60+irq,0x20); /* 'Specific EOI' to master */ + } + spin_unlock_irqrestore(&i8259A_lock, flags); + return; + + spurious_8259A_irq: + /* + * this is the slow path - should happen rarely. + */ + if (i8259A_irq_real(irq)) + /* + * oops, the IRQ _is_ in service according to the + * 8259A - not spurious, go handle it. + */ + goto handle_real_irq; + + { + static int spurious_irq_mask; + /* + * At this point we can be sure the IRQ is spurious, + * lets ACK and report it. [once per IRQ] + */ + if (!(spurious_irq_mask & irqmask)) { + printk("spurious 8259A interrupt: IRQ%d.\n", irq); + spurious_irq_mask |= irqmask; + } + atomic_inc(&irq_err_count); + /* + * Theoretically we do not have to handle this IRQ, + * but in Linux this does not cause problems and is + * simpler for us. + */ + goto handle_real_irq; + } +} + +void __init init_8259A(int auto_eoi) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, 0x21); /* mask all of 8259A-1 */ + outb(0xff, 0xA1); /* mask all of 8259A-2 */ + + /* + * outb_p - this has to work on a wide range of PC hardware. + */ + outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ + outb_p(0x30 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ + outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */ + if (auto_eoi) + outb_p(0x03, 0x21); /* master does Auto EOI */ + else + outb_p(0x01, 0x21); /* master expects normal EOI */ + + outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ + outb_p(0x30 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ + outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */ + outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode + is to be investigated) */ + + if (auto_eoi) + /* + * in AEOI mode we just have to mask the interrupt + * when acking. + */ + i8259A_irq_type.ack = disable_8259A_irq; + else + i8259A_irq_type.ack = mask_and_ack_8259A; + + udelay(100); /* wait for 8259A to initialize */ + + outb(cached_21, 0x21); /* restore master IRQ mask */ + outb(cached_A1, 0xA1); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + + +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ + +static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; + +void __init init_ISA_irqs (void) +{ + int i; + +#ifdef CONFIG_X86_LOCAL_APIC + init_bsp_APIC(); +#endif + init_8259A(0); + + for (i = 0; i < NR_IRQS; i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = 0; + irq_desc[i].depth = 1; + + if (i < 16) { + /* + * 16 old-style INTA-cycle interrupts: + */ + irq_desc[i].handler = &i8259A_irq_type; + } else { + /* + * 'high' PCI IRQs filled in on demand + */ + irq_desc[i].handler = &no_irq_type; + } + } +} + +void __init init_IRQ(void) +{ + int i; + + init_ISA_irqs(); + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = 0; i < NR_IRQS; i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (vector != HYPERVISOR_CALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } + +#ifdef CONFIG_SMP + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt); + + /* IPI for invalidation */ + set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + /* self generated IPI for local APIC timer */ + set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +#endif + + /* + * Set the clock to HZ Hz, we already have a valid + * vector now: + */ +#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */ +#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ) + outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(LATCH & 0xff , 0x40); /* LSB */ + outb(LATCH >> 8 , 0x40); /* MSB */ + + setup_irq(2, &irq2); +} + diff --git a/xen/arch/x86/idle0_task.c b/xen/arch/x86/idle0_task.c new file mode 100644 index 0000000000..b956fdff40 --- /dev/null +++ b/xen/arch/x86/idle0_task.c @@ -0,0 +1,15 @@ +#include +#include +#include + +struct task_struct idle0_task = IDLE0_TASK(idle0_task); + +/* + * per-CPU TSS segments. Threads are completely 'soft' on Linux, + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data.cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; + diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c new file mode 100644 index 0000000000..9c94e787e0 --- /dev/null +++ b/xen/arch/x86/io_apic.c @@ -0,0 +1,1944 @@ +/* + * Intel IO-APIC support for multi-Pentium hosts. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo + * + * Many thanks to Stig Venaas for trying out countless experimental + * patches and reporting/debugging problems patiently! + * + * (c) 1999, Multiple IO-APIC support, developed by + * Ken-ichi Yaku and + * Hidemi Kishimoto , + * further tested and cleaned up by Zach Brown + * and Ingo Molnar + * + * Fixes + * Maciej W. Rozycki : Bits for genuine 82489DX APICs; + * thanks to Eric Gilmore + * and Rolf G. Tews + * for testing these extensively + * Paul Diefenbaugh : Added full ACPI support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_IO_APIC + +#undef APIC_LOCKUP_DEBUG + +#define APIC_LOCKUP_DEBUG + +static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED; + +unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL; +unsigned char int_delivery_mode = dest_LowestPrio; + + +/* + * # of IRQ routing registers + */ +int nr_ioapic_registers[MAX_IO_APICS]; + +/* + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +#define MAX_PLUS_SHARED_IRQS NR_IRQS +#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) + +/* + * This is performance-critical, we want to do it O(1) + * + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ + +static struct irq_pin_list { + int apic, pin, next; +} irq_2_pin[PIN_MAP_SIZE]; + +/* + * The common case is 1:1 IRQ<->pin mappings. Sometimes there are + * shared ISA-space IRQs, so we have to support them. We are super + * fast in the common case, and fast for shared ISA-space IRQs. + */ +static void __init add_pin_to_irq(unsigned int irq, int apic, int pin) +{ + static int first_free_entry = NR_IRQS; + struct irq_pin_list *entry = irq_2_pin + irq; + + while (entry->next) + entry = irq_2_pin + entry->next; + + if (entry->pin != -1) { + entry->next = first_free_entry; + entry = irq_2_pin + entry->next; + if (++first_free_entry >= PIN_MAP_SIZE) + panic("io_apic.c: whoops"); + } + entry->apic = apic; + entry->pin = pin; +} + +/* + * Reroute an IRQ to a different pin. + */ +static void __init replace_pin_at_irq(unsigned int irq, + int oldapic, int oldpin, + int newapic, int newpin) +{ + struct irq_pin_list *entry = irq_2_pin + irq; + + while (1) { + if (entry->apic == oldapic && entry->pin == oldpin) { + entry->apic = newapic; + entry->pin = newpin; + } + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } +} + +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_pin_list *entry = irq_2_pin + irq; \ + \ + for (;;) { \ + unsigned int reg; \ + pin = entry->pin; \ + if (pin == -1) \ + break; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_write(entry->apic, 0x10 + R + pin*2, reg); \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ + FINAL; \ +} + +#define DO_ACTION(name,R,ACTION, FINAL) \ + \ + static void name##_IO_APIC_irq (unsigned int irq) \ + __DO_ACTION(R, ACTION, FINAL) + +DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) +DO_ACTION( __edge, 0, &= 0xffff7fff, ) +DO_ACTION( __level, 0, |= 0x00008000, ) + +static void mask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __mask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void unmask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +{ + struct IO_APIC_route_entry entry; + unsigned long flags; + + /* Check delivery_mode to be sure we're not clearing an SMI pin */ + spin_lock_irqsave(&ioapic_lock, flags); + *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); + *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + if (entry.delivery_mode == dest_SMI) + return; + + /* + * Disable it in the IO-APIC irq-routing table: + */ + memset(&entry, 0, sizeof(entry)); + entry.mask = 1; + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void clear_IO_APIC (void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + clear_IO_APIC_pin(apic, pin); +} + +static void set_ioapic_affinity (unsigned int irq, unsigned long mask) +{ + unsigned long flags; + + /* + * Only the first 8 bits are valid. + */ + mask = mask << 24; + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = mask, ) + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +#define balance_irq(_irq) ((void)0) + +/* + * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to + * specific CPU-side IRQs. + */ + +#define MAX_PIRQS 8 +int pirq_entries [MAX_PIRQS]; +int pirqs_enabled; + +int skip_ioapic_setup; +#if 0 + +static int __init noioapic_setup(char *str) +{ + skip_ioapic_setup = 1; + return 1; +} + +__setup("noapic", noioapic_setup); + +static int __init ioapic_setup(char *str) +{ + skip_ioapic_setup = 0; + return 1; +} + +__setup("apic", ioapic_setup); + + + +static int __init ioapic_pirq_setup(char *str) +{ + int i, max; + int ints[MAX_PIRQS+1]; + + get_options(str, ARRAY_SIZE(ints), ints); + + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; + + pirqs_enabled = 1; + printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n"); + max = MAX_PIRQS; + if (ints[0] < MAX_PIRQS) + max = ints[0]; + + for (i = 0; i < max; i++) { + printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); + /* + * PIRQs are mapped upside down, usually. + */ + pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; + } + return 1; +} + +__setup("pirq=", ioapic_pirq_setup); + +#endif + +/* + * Find the IRQ entry number of a certain pin. + */ +static int __init find_irq_entry(int apic, int pin, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_irqtype == type && + (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || + mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && + mp_irqs[i].mpc_dstirq == pin) + return i; + + return -1; +} + +/* + * Find the pin to which IRQ[irq] (ISA) is connected + */ +static int __init find_isa_irq_pin(int irq, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + + if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || + mp_bus_id_to_type[lbus] == MP_BUS_EISA || + mp_bus_id_to_type[lbus] == MP_BUS_MCA) && + (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_srcbusirq == irq)) + + return mp_irqs[i].mpc_dstirq; + } + return -1; +} + +/* + * Find a specific PCI IRQ entry. + * Not an __init, possibly needed by modules + */ +static int pin_2_irq(int idx, int apic, int pin); + +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) +{ + int apic, i, best_guess = -1; + + Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); + if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) { + printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + return -1; + } + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + + for (apic = 0; apic < nr_ioapics; apic++) + if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || + mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + break; + + if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && + !mp_irqs[i].mpc_irqtype && + (bus == lbus) && + (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + + if (!(apic || IO_APIC_IRQ(irq))) + continue; + + if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + return irq; + /* + * Use the first all-but-pin matching entry as a + * best-guess fuzzy result for broken mptables. + */ + if (best_guess < 0) + best_guess = irq; + } + } + return best_guess; +} + +/* + * EISA Edge/Level control register, ELCR + */ +static int __init EISA_ELCR(unsigned int irq) +{ + if (irq < 16) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } + printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq); + return 0; +} + +/* EISA interrupts are always polarity zero and can be edge or level + * trigger depending on the ELCR value. If an interrupt is listed as + * EISA conforming in the MP table, that means its trigger type must + * be read in from the ELCR */ + +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) +#define default_EISA_polarity(idx) (0) + +/* ISA interrupts are always polarity zero edge triggered, + * when listed as conforming in the MP table. */ + +#define default_ISA_trigger(idx) (0) +#define default_ISA_polarity(idx) (0) + +/* PCI interrupts are always polarity one level triggered, + * when listed as conforming in the MP table. */ + +#define default_PCI_trigger(idx) (1) +#define default_PCI_polarity(idx) (1) + +/* MCA interrupts are always polarity zero level triggered, + * when listed as conforming in the MP table. */ + +#define default_MCA_trigger(idx) (1) +#define default_MCA_polarity(idx) (0) + +static int __init MPBIOS_polarity(int idx) +{ + int bus = mp_irqs[idx].mpc_srcbus; + int polarity; + + /* + * Determine IRQ line polarity (high active or low active): + */ + switch (mp_irqs[idx].mpc_irqflag & 3) + { + case 0: /* conforms, ie. bus-type dependent polarity */ + { + switch (mp_bus_id_to_type[bus]) + { + case MP_BUS_ISA: /* ISA pin */ + { + polarity = default_ISA_polarity(idx); + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + polarity = default_EISA_polarity(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + polarity = default_PCI_polarity(idx); + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + polarity = default_MCA_polarity(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + } + break; + } + case 1: /* high active */ + { + polarity = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + case 3: /* low active */ + { + polarity = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + } + return polarity; +} + +static int __init MPBIOS_trigger(int idx) +{ + int bus = mp_irqs[idx].mpc_srcbus; + int trigger; + + /* + * Determine IRQ trigger mode (edge or level sensitive): + */ + switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + { + case 0: /* conforms, ie. bus-type dependent */ + { + switch (mp_bus_id_to_type[bus]) + { + case MP_BUS_ISA: /* ISA pin */ + { + trigger = default_ISA_trigger(idx); + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + trigger = default_EISA_trigger(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + trigger = default_PCI_trigger(idx); + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + trigger = default_MCA_trigger(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + } + break; + } + case 1: /* edge */ + { + trigger = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + case 3: /* level */ + { + trigger = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 0; + break; + } + } + return trigger; +} + +static inline int irq_polarity(int idx) +{ + return MPBIOS_polarity(idx); +} + +static inline int irq_trigger(int idx) +{ + return MPBIOS_trigger(idx); +} + +static int pin_2_irq(int idx, int apic, int pin) +{ + int irq, i; + int bus = mp_irqs[idx].mpc_srcbus; + + /* + * Debugging check, we are in big trouble if this message pops up! + */ + if (mp_irqs[idx].mpc_dstirq != pin) + printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); + + switch (mp_bus_id_to_type[bus]) + { + case MP_BUS_ISA: /* ISA pin */ + case MP_BUS_EISA: + case MP_BUS_MCA: + { + irq = mp_irqs[idx].mpc_srcbusirq; + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + /* + * PCI IRQs are mapped in order + */ + i = irq = 0; + while (i < apic) + irq += nr_ioapic_registers[i++]; + irq += pin; + break; + } + default: + { + printk(KERN_ERR "unknown bus type %d.\n",bus); + irq = 0; + break; + } + } + + /* + * PCI IRQ command line redirection. Yes, limits are hardcoded. + */ + if ((pin >= 16) && (pin <= 23)) { + if (pirq_entries[pin-16] != -1) { + if (!pirq_entries[pin-16]) { + printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16); + } else { + irq = pirq_entries[pin-16]; + printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n", + pin-16, irq); + } + } + } + return irq; +} + +static inline int IO_APIC_irq_trigger(int irq) +{ + int apic, idx, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic,pin,mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; +} + +int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 }; + +static int __init assign_irq_vector(int irq) +{ + static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; + if (IO_APIC_VECTOR(irq) > 0) + return IO_APIC_VECTOR(irq); +next: + current_vector += 8; + + /* XXX Skip the guestOS -> Xen syscall vector! XXX */ + if (current_vector == HYPERVISOR_CALL_VECTOR) goto next; + /* XXX Skip the Linux/BSD fast-trap vector! XXX */ + if (current_vector == 0x80) goto next; + + if (current_vector > FIRST_SYSTEM_VECTOR) { + offset++; + current_vector = FIRST_DEVICE_VECTOR + offset; + } + + if (current_vector == FIRST_SYSTEM_VECTOR) + panic("ran out of interrupt sources!"); + + IO_APIC_VECTOR(irq) = current_vector; + return current_vector; +} + +extern void (*interrupt[NR_IRQS])(void); + +/* + * Level and edge triggered IO-APIC interrupts need different handling, + * so we use two separate IRQ descriptors. Edge triggered IRQs can be + * handled with the level-triggered descriptor, but that one has slightly + * more overhead. Level-triggered interrupts cannot be handled with the + * edge-triggered handler, without risking IRQ storms and other ugly + * races. + */ + +static unsigned int startup_edge_ioapic_irq(unsigned int irq); +#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq +#define enable_edge_ioapic_irq unmask_IO_APIC_irq +static void disable_edge_ioapic_irq (unsigned int irq); +static void ack_edge_ioapic_irq(unsigned int irq); +static void end_edge_ioapic_irq (unsigned int i); +static struct hw_interrupt_type ioapic_edge_irq_type = { + "IO-APIC-edge", + startup_edge_ioapic_irq, + shutdown_edge_ioapic_irq, + enable_edge_ioapic_irq, + disable_edge_ioapic_irq, + ack_edge_ioapic_irq, + end_edge_ioapic_irq, + set_ioapic_affinity, +}; + +static unsigned int startup_level_ioapic_irq (unsigned int irq); +#define shutdown_level_ioapic_irq mask_IO_APIC_irq +#define enable_level_ioapic_irq unmask_IO_APIC_irq +#define disable_level_ioapic_irq mask_IO_APIC_irq +static void mask_and_ack_level_ioapic_irq (unsigned int irq); +static void end_level_ioapic_irq (unsigned int irq); +static struct hw_interrupt_type ioapic_level_irq_type = { + "IO-APIC-level", + startup_level_ioapic_irq, + shutdown_level_ioapic_irq, + enable_level_ioapic_irq, + disable_level_ioapic_irq, + mask_and_ack_level_ioapic_irq, + end_level_ioapic_irq, + set_ioapic_affinity, +}; + +void __init setup_IO_APIC_irqs(void) +{ + struct IO_APIC_route_entry entry; + int apic, pin, idx, irq, first_notcon = 1, vector; + unsigned long flags; + + printk(KERN_DEBUG "init IO_APIC IRQs\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + + /* + * add it to the IO-APIC irq-routing table: + */ + memset(&entry,0,sizeof(entry)); + + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = (INT_DEST_ADDR_MODE != 0); + entry.mask = 0; /* enable IRQ */ + entry.dest.logical.logical_dest = target_cpus(); + + idx = find_irq_entry(apic,pin,mp_INT); + if (idx == -1) { + if (first_notcon) { + printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); + first_notcon = 0; + } else + printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin); + continue; + } + + entry.trigger = irq_trigger(idx); + entry.polarity = irq_polarity(idx); + + if (irq_trigger(idx)) { + entry.trigger = 1; + entry.mask = 1; + } + + irq = pin_2_irq(idx, apic, pin); + /* + * skip adding the timer int on secondary nodes, which causes + * a small but painful rift in the time-space continuum + */ + if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) + && (apic != 0) && (irq == 0)) + continue; + else + add_pin_to_irq(irq, apic, pin); + + if (!apic && !IO_APIC_IRQ(irq)) + continue; + + if (IO_APIC_IRQ(irq)) { + vector = assign_irq_vector(irq); + entry.vector = vector; + + if (IO_APIC_irq_trigger(irq)) + irq_desc[irq].handler = &ioapic_level_irq_type; + else + irq_desc[irq].handler = &ioapic_edge_irq_type; + + set_intr_gate(vector, interrupt[irq]); + + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + } + + if (!first_notcon) + printk(" not connected.\n"); +} + +/* + * Set up the 8259A-master output pin as broadcast to all + * CPUs. + */ +void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) +{ + struct IO_APIC_route_entry entry; + unsigned long flags; + + memset(&entry,0,sizeof(entry)); + + disable_8259A_irq(0); + + /* mask LVT0 */ + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + + /* + * We use logical delivery to get the timer IRQ + * to the first CPU. + */ + entry.dest_mode = (INT_DEST_ADDR_MODE != 0); + entry.mask = 0; /* unmask IRQ now */ + entry.dest.logical.logical_dest = target_cpus(); + entry.delivery_mode = INT_DELIVERY_MODE; + entry.polarity = 0; + entry.trigger = 0; + entry.vector = vector; + + /* + * The timer IRQ doesn't have to know that behind the + * scene we have a 8259A-master in AEOI mode ... + */ + irq_desc[0].handler = &ioapic_edge_irq_type; + + /* + * Add it to the IO-APIC irq-routing table: + */ + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + + enable_8259A_irq(0); +} + +void __init UNEXPECTED_IO_APIC(void) +{ + printk(KERN_WARNING + "An unexpected IO-APIC was found. If this kernel release is less than\n" + "three months old please report this to linux-smp@vger.kernel.org\n"); +} + +void __init print_IO_APIC(void) +{ +#ifndef NDEBUG + int apic, i; + struct IO_APIC_reg_00 reg_00; + struct IO_APIC_reg_01 reg_01; + struct IO_APIC_reg_02 reg_02; + struct IO_APIC_reg_03 reg_03; + unsigned long flags; + + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + for (i = 0; i < nr_ioapics; i++) + printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", + mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + + /* + * We are a bit conservative about what we expect. We have to + * know about every hardware change ASAP. + */ + printk(KERN_INFO "testing the IO APIC.......................\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_00 = io_apic_read(apic, 0); + *(int *)®_01 = io_apic_read(apic, 1); + if (reg_01.version >= 0x10) + *(int *)®_02 = io_apic_read(apic, 2); + if (reg_01.version >= 0x20) + *(int *)®_03 = io_apic_read(apic, 3); + spin_unlock_irqrestore(&ioapic_lock, flags); + + printk("\n"); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)®_00); + printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID); + printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.delivery_type); + printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.LTS); + if (reg_00.__reserved_0 || reg_00.__reserved_1 || reg_00.__reserved_2) + UNEXPECTED_IO_APIC(); + + printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); + printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.entries); + if ( (reg_01.entries != 0x0f) && /* older (Neptune) boards */ + (reg_01.entries != 0x17) && /* typical ISA+PCI boards */ + (reg_01.entries != 0x1b) && /* Compaq Proliant boards */ + (reg_01.entries != 0x1f) && /* dual Xeon boards */ + (reg_01.entries != 0x22) && /* bigger Xeon boards */ + (reg_01.entries != 0x2E) && + (reg_01.entries != 0x3F) + ) + UNEXPECTED_IO_APIC(); + + printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.PRQ); + printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.version); + if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */ + (reg_01.version != 0x02) && /* VIA */ + (reg_01.version != 0x03) && /* later VIA */ + (reg_01.version != 0x10) && /* oldest IO-APICs */ + (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */ + (reg_01.version != 0x13) && /* Xeon IO-APICs */ + (reg_01.version != 0x20) /* Intel P64H (82806 AA) */ + ) + UNEXPECTED_IO_APIC(); + if (reg_01.__reserved_1 || reg_01.__reserved_2) + UNEXPECTED_IO_APIC(); + + /* + * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, + * but the value of reg_02 is read as the previous read register + * value, so ignore it if reg_02 == reg_01. + */ + if (reg_01.version >= 0x10 && *(int *)®_02 != *(int *)®_01) { + printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)®_02); + printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.arbitration); + if (reg_02.__reserved_1 || reg_02.__reserved_2) + UNEXPECTED_IO_APIC(); + } + + /* + * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 + * or reg_03, but the value of reg_0[23] is read as the previous read + * register value, so ignore it if reg_03 == reg_0[12]. + */ + if (reg_01.version >= 0x20 && *(int *)®_03 != *(int *)®_02 && + *(int *)®_03 != *(int *)®_01) { + printk(KERN_DEBUG ".... register #03: %08X\n", *(int *)®_03); + printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.boot_DT); + if (reg_03.__reserved_1) + UNEXPECTED_IO_APIC(); + } + + printk(KERN_DEBUG ".... IRQ redirection table:\n"); + + printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" + " Stat Dest Deli Vect: \n"); + + for (i = 0; i <= reg_01.entries; i++) { + struct IO_APIC_route_entry entry; + + spin_lock_irqsave(&ioapic_lock, flags); + *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); + *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); + spin_unlock_irqrestore(&ioapic_lock, flags); + + printk(KERN_DEBUG " %02x %03X %02X ", + i, + entry.dest.logical.logical_dest, + entry.dest.physical.physical_dest + ); + + printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", + entry.mask, + entry.trigger, + entry.irr, + entry.polarity, + entry.delivery_status, + entry.dest_mode, + entry.delivery_mode, + entry.vector + ); + } + } + printk(KERN_DEBUG "IRQ to pin mappings:\n"); + for (i = 0; i < NR_IRQS; i++) { + struct irq_pin_list *entry = irq_2_pin + i; + if (entry->pin < 0) + continue; + printk(KERN_DEBUG "IRQ%d ", i); + for (;;) { + printk("-> %d:%d", entry->apic, entry->pin); + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } + printk("\n"); + } + + printk(KERN_INFO ".................................... done.\n"); +#endif +} + + +#if 0 /* Maybe useful for debugging, but not currently used anywhere. */ + +static void print_APIC_bitfield (int base) +{ + unsigned int v; + int i, j; + + printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); + for (i = 0; i < 8; i++) { + v = apic_read(base + i*0x10); + for (j = 0; j < 32; j++) { + if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + v = apic_read(APIC_ESR); + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + } + + v = apic_read(APIC_ICR); + printk(KERN_DEBUG "... APIC ICR: %08x\n", v); + v = apic_read(APIC_ICR2); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + + v = apic_read(APIC_LVTT); + printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); + + if (maxlvt > 3) { /* PC is LVT#4. */ + v = apic_read(APIC_LVTPC); + printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); + } + v = apic_read(APIC_LVT0); + printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); + v = apic_read(APIC_LVT1); + printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); + + if (maxlvt > 2) { /* ERR is LVT#3. */ + v = apic_read(APIC_LVTERR); + printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); + } + + v = apic_read(APIC_TMICT); + printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); + v = apic_read(APIC_TMCCT); + printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); + v = apic_read(APIC_TDCR); + printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); + printk("\n"); +} + +void print_all_local_APICs (void) +{ + smp_call_function(print_local_APIC, NULL, 1, 1); + print_local_APIC(NULL); +} + +void /*__init*/ print_PIC(void) +{ + extern spinlock_t i8259A_lock; + unsigned int v, flags; + + printk(KERN_DEBUG "\nprinting PIC contents\n"); + + spin_lock_irqsave(&i8259A_lock, flags); + + v = inb(0xa1) << 8 | inb(0x21); + printk(KERN_DEBUG "... PIC IMR: %04x\n", v); + + v = inb(0xa0) << 8 | inb(0x20); + printk(KERN_DEBUG "... PIC IRR: %04x\n", v); + + outb(0x0b,0xa0); + outb(0x0b,0x20); + v = inb(0xa0) << 8 | inb(0x20); + outb(0x0a,0xa0); + outb(0x0a,0x20); + + spin_unlock_irqrestore(&i8259A_lock, flags); + + printk(KERN_DEBUG "... PIC ISR: %04x\n", v); + + v = inb(0x4d1) << 8 | inb(0x4d0); + printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); +} + +#endif /* 0 */ + + +static void __init enable_IO_APIC(void) +{ + struct IO_APIC_reg_01 reg_01; + int i; + unsigned long flags; + + for (i = 0; i < PIN_MAP_SIZE; i++) { + irq_2_pin[i].pin = -1; + irq_2_pin[i].next = 0; + } + if (!pirqs_enabled) + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (i = 0; i < nr_ioapics; i++) { + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_01 = io_apic_read(i, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[i] = reg_01.entries+1; + } + + /* + * Do not trust the IO-APIC being empty at bootup + */ + clear_IO_APIC(); +} + +/* + * Not an __init, needed by the reboot code + */ +void disable_IO_APIC(void) +{ + /* + * Clear the IO-APIC before rebooting: + */ + clear_IO_APIC(); + + disconnect_bsp_APIC(); +} + +/* + * function to set the IO-APIC physical IDs based on the + * values stored in the MPC table. + * + * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 + */ + +static void __init setup_ioapic_ids_from_mpc (void) +{ + struct IO_APIC_reg_00 reg_00; + unsigned long phys_id_present_map = phys_cpu_present_map; + int apic; + int i; + unsigned char old_id; + unsigned long flags; + + if (clustered_apic_mode) + /* We don't have a good way to do this yet - hack */ + phys_id_present_map = (u_long) 0xf; + /* + * Set the IOAPIC ID to the value stored in the MPC table. + */ + for (apic = 0; apic < nr_ioapics; apic++) { + + /* Read the register 0 value */ + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_00 = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + old_id = mp_ioapics[apic].mpc_apicid; + + if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", + apic, mp_ioapics[apic].mpc_apicid); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + reg_00.ID); + mp_ioapics[apic].mpc_apicid = reg_00.ID; + } + + /* + * Sanity check, is the ID really free? Every APIC in a + * system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs. + */ + if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) && + (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", + apic, mp_ioapics[apic].mpc_apicid); + for (i = 0; i < 0xf; i++) + if (!(phys_id_present_map & (1 << i))) + break; + if (i >= apic_broadcast_id) + panic("Max APIC ID exceeded!\n"); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + i); + phys_id_present_map |= 1 << i; + mp_ioapics[apic].mpc_apicid = i; + } else { + printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid); + phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid; + } + + + /* + * We need to adjust the IRQ routing table + * if the ID changed. + */ + if (old_id != mp_ioapics[apic].mpc_apicid) + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_dstapic == old_id) + mp_irqs[i].mpc_dstapic + = mp_ioapics[apic].mpc_apicid; + + /* + * Read the right value from the MPC table and + * write it into the ID register. + */ + printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", + mp_ioapics[apic].mpc_apicid); + + reg_00.ID = mp_ioapics[apic].mpc_apicid; + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0, *(int *)®_00); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* + * Sanity check + */ + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_00 = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + if (reg_00.ID != mp_ioapics[apic].mpc_apicid) + panic("could not set ID!\n"); + else + printk(" ok.\n"); + } +} + +/* + * There is a nasty bug in some older SMP boards, their mptable lies + * about the timer IRQ. We do the following to work around the situation: + * + * - timer IRQ defaults to IO-APIC IRQ + * - if this function detects that timer IRQs are defunct, then we fall + * back to ISA timer IRQs + */ +static int __init timer_irq_works(void) +{ + unsigned int t1 = jiffies; + + sti(); + /* Let ten ticks pass... */ + mdelay((10 * 1000) / HZ); + + /* + * Expect a few ticks at least, to be sure some possible + * glue logic does not lock up after one or two first + * ticks in a non-ExtINT mode. Also the local APIC + * might have cached one ExtINT interrupt. Finally, at + * least one tick may be lost due to delays. + */ + if (jiffies - t1 > 4) + return 1; + + return 0; +} + +static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ } + +/* + * Starting up a edge-triggered IO-APIC interrupt is + * nasty - we need to make sure that we get the edge. + * If it is already asserted for some reason, we need + * return 1 to indicate that is was pending. + * + * This is not complete - we should be able to fake + * an edge even if it isn't on the 8259A... + */ + +static unsigned int startup_edge_ioapic_irq(unsigned int irq) +{ + int was_pending = 0; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + if (irq < 16) { + disable_8259A_irq(irq); + if (i8259A_irq_pending(irq)) + was_pending = 1; + } + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return was_pending; +} + +/* + * Once we have recorded IRQ_PENDING already, we can mask the + * interrupt for real. This prevents IRQ storms from unhandled + * devices. + */ +static void ack_edge_ioapic_irq(unsigned int irq) +{ + balance_irq(irq); + if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) + == (IRQ_PENDING | IRQ_DISABLED)) + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ } + + +/* + * Level triggered interrupts can just be masked, + * and shutting down and starting up the interrupt + * is the same as enabling and disabling them -- except + * with a startup need to return a "was pending" value. + * + * Level triggered interrupts are special because we + * do not touch any IO-APIC register while handling + * them. We ack the APIC in the end-IRQ handler, not + * in the start-IRQ-handler. Protection against reentrance + * from the same interrupt is still provided, both by the + * generic IRQ layer and by the fact that an unacked local + * APIC does not accept IRQs. + */ +static unsigned int startup_level_ioapic_irq (unsigned int irq) +{ + unmask_IO_APIC_irq(irq); + + return 0; /* don't check for pending */ +} + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ + unsigned long v; + int i; + + balance_irq(irq); + + mask_IO_APIC_irq(irq); + +/* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = IO_APIC_VECTOR(irq); + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); + + if (!(v & (1 << (i & 0x1f)))) { +#ifdef APIC_LOCKUP_DEBUG + struct irq_pin_list *entry; +#endif + +#ifdef APIC_MISMATCH_DEBUG + atomic_inc(&irq_mis_count); +#endif + spin_lock(&ioapic_lock); + __edge_IO_APIC_irq(irq); +#ifdef APIC_LOCKUP_DEBUG + for (entry = irq_2_pin + irq;;) { + unsigned int reg; + + if (entry->pin == -1) + break; + reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2); + if (reg & 0x00004000) + printk(KERN_CRIT "Aieee!!! Remote IRR" + " still set after unlock!\n"); + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } +#endif + __level_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } +} + +static void end_level_ioapic_irq(unsigned int irq) +{ + unmask_IO_APIC_irq(irq); +} + +static inline void init_IO_APIC_traps(void) +{ + int irq; + + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + for (irq = 0; irq < NR_IRQS ; irq++) { + if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 + * interrupt if we can.. + */ + if (irq < 16) + make_8259A_irq(irq); + else + /* Strange. Oh, well.. */ + irq_desc[irq].handler = &no_irq_type; + } + } +} + +static void enable_lapic_irq (unsigned int irq) +{ + unsigned long v; + + v = apic_read(APIC_LVT0); + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); +} + +static void disable_lapic_irq (unsigned int irq) +{ + unsigned long v; + + v = apic_read(APIC_LVT0); + apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); +} + +static void ack_lapic_irq (unsigned int irq) +{ + ack_APIC_irq(); +} + +static void end_lapic_irq (unsigned int i) { /* nothing */ } + +static struct hw_interrupt_type lapic_irq_type = { + "local-APIC-edge", + NULL, /* startup_irq() not used for IRQ0 */ + NULL, /* shutdown_irq() not used for IRQ0 */ + enable_lapic_irq, + disable_lapic_irq, + ack_lapic_irq, + end_lapic_irq +}; + +/* + * This looks a bit hackish but it's about the only one way of sending + * a few INTA cycles to 8259As and any associated glue logic. ICR does + * not support the ExtINT mode, unfortunately. We need to send these + * cycles as some i82489DX-based boards have glue logic that keeps the + * 8259A interrupt line asserted until INTA. --macro + */ +static inline void unlock_ExtINT_logic(void) +{ + int pin, i; + struct IO_APIC_route_entry entry0, entry1; + unsigned char save_control, save_freq_select; + unsigned long flags; + + pin = find_isa_irq_pin(8, mp_INT); + if (pin == -1) + return; + + spin_lock_irqsave(&ioapic_lock, flags); + *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); + *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + clear_IO_APIC_pin(0, pin); + + memset(&entry1, 0, sizeof(entry1)); + + entry1.dest_mode = 0; /* physical delivery */ + entry1.mask = 0; /* unmask IRQ now */ + entry1.dest.physical.physical_dest = hard_smp_processor_id(); + entry1.delivery_mode = dest_ExtINT; + entry1.polarity = entry0.polarity; + entry1.trigger = 0; + entry1.vector = 0; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); + io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + + save_control = CMOS_READ(RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, + RTC_FREQ_SELECT); + CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); + + i = 100; + while (i-- > 0) { + mdelay(10); + if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) + i -= 10; + } + + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + clear_IO_APIC_pin(0, pin); + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); + io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +/* + * This code may look a bit paranoid, but it's supposed to cooperate with + * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ + * is so screwy. Thanks to Brian Perkins for testing/hacking this beast + * fanatically on his truly buggy board. + */ +static inline void check_timer(void) +{ + extern int timer_ack; + int pin1, pin2; + int vector; + + /* + * get/set the timer IRQ vector: + */ + disable_8259A_irq(0); + vector = assign_irq_vector(0); + set_intr_gate(vector, interrupt[0]); + + /* + * Subtle, code in do_timer_interrupt() expects an AEOI + * mode for the 8259A whenever interrupts are routed + * through I/O APICs. Also IRQ0 has to be enabled in + * the 8259A which implies the virtual wire has to be + * disabled in the local APIC. + */ + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + init_8259A(1); + timer_ack = 1; + enable_8259A_irq(0); + + pin1 = find_isa_irq_pin(0, mp_INT); + pin2 = find_isa_irq_pin(0, mp_ExtINT); + + printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2); + + if (pin1 != -1) { + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + unmask_IO_APIC_irq(0); + if (timer_irq_works()) + return; + clear_IO_APIC_pin(0, pin1); + printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); + } + + printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); + if (pin2 != -1) { + printk("\n..... (found pin %d) ...", pin2); + /* + * legacy devices should be connected to IO APIC #0 + */ + setup_ExtINT_IRQ0_pin(pin2, vector); + if (timer_irq_works()) { + printk("works.\n"); + if (pin1 != -1) + replace_pin_at_irq(0, 0, pin1, 0, pin2); + else + add_pin_to_irq(0, 0, pin2); + return; + } + /* + * Cleanup, just in case ... + */ + clear_IO_APIC_pin(0, pin2); + } + printk(" failed.\n"); + + printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); + + disable_8259A_irq(0); + irq_desc[0].handler = &lapic_irq_type; + apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ + enable_8259A_irq(0); + + if (timer_irq_works()) { + printk(" works.\n"); + return; + } + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); + printk(" failed.\n"); + + printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); + + init_8259A(0); + make_8259A_irq(0); + apic_write_around(APIC_LVT0, APIC_DM_EXTINT); + + unlock_ExtINT_logic(); + + if (timer_irq_works()) { + printk(" works.\n"); + return; + } + printk(" failed :(.\n"); + panic("IO-APIC + timer doesn't work! pester mingo@redhat.com"); +} + +/* + * + * IRQ's that are handled by the old PIC in all cases: + * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. + * Linux doesn't really care, as it's not actually used + * for any interrupt handling anyway. + * - There used to be IRQ13 here as well, but all + * MPS-compliant must not use it for FPU coupling and we + * want to use exception 16 anyway. And there are + * systems who connect it to an I/O APIC for other uses. + * Thus we don't mark it special any longer. + * + * Additionally, something is definitely wrong with irq9 + * on PIIX4 boards. + */ +#define PIC_IRQS (1<<2) + +void __init setup_IO_APIC(void) +{ + enable_IO_APIC(); + + io_apic_irqs = ~PIC_IRQS; + printk("ENABLING IO-APIC IRQs\n"); + + /* + * Set up IO-APIC IRQ routing. + */ + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); + sync_Arb_IDs(); + setup_IO_APIC_irqs(); + init_IO_APIC_traps(); + check_timer(); + if (!acpi_ioapic) + print_IO_APIC(); +} + +#endif /* CONFIG_X86_IO_APIC */ + + + +/* -------------------------------------------------------------------------- + ACPI-based IOAPIC Configuration + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI_BOOT + +#define IO_APIC_MAX_ID 15 + +int __init io_apic_get_unique_id (int ioapic, int apic_id) +{ + struct IO_APIC_reg_00 reg_00; + static unsigned long apic_id_map = 0; + unsigned long flags; + int i = 0; + + /* + * The P4 platform supports up to 256 APIC IDs on two separate APIC + * buses (one for LAPICs, one for IOAPICs), where predecessors only + * supports up to 16 on one shared APIC bus. + * + * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full + * advantage of new APIC bus architecture. + */ + + if (!apic_id_map) + apic_id_map = phys_cpu_present_map; + + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_00 = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + if (apic_id >= IO_APIC_MAX_ID) { + printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " + "%d\n", ioapic, apic_id, reg_00.ID); + apic_id = reg_00.ID; + } + + /* XAPICs do not need unique IDs */ + if (clustered_apic_mode == CLUSTERED_APIC_XAPIC){ + printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", + ioapic, apic_id); + return apic_id; + } + + /* + * Every APIC in a system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (apic_id_map & (1 << apic_id)) { + + for (i = 0; i < IO_APIC_MAX_ID; i++) { + if (!(apic_id_map & (1 << i))) + break; + } + + if (i == IO_APIC_MAX_ID) + panic("Max apic_id exceeded!\n"); + + printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " + "trying %d\n", ioapic, apic_id, i); + + apic_id = i; + } + + apic_id_map |= (1 << apic_id); + + if (reg_00.ID != apic_id) { + reg_00.ID = apic_id; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(ioapic, 0, *(int *)®_00); + *(int *)®_00 = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* Sanity check */ + if (reg_00.ID != apic_id) + panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); + } + + printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); + + return apic_id; +} + + +int __init io_apic_get_version (int ioapic) +{ + struct IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_01 = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.version; +} + + +int __init io_apic_get_redir_entries (int ioapic) +{ + struct IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_01 = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.entries; +} + + +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) +{ + struct IO_APIC_route_entry entry; + unsigned long flags; + + if (!IO_APIC_IRQ(irq)) { + printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0/n", + ioapic); + return -EINVAL; + } + + /* + * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. + * Note that we mask (disable) IRQs now -- these get enabled when the + * corresponding device driver registers for this IRQ. + */ + + memset(&entry,0,sizeof(entry)); + + entry.delivery_mode = dest_LowestPrio; + entry.dest_mode = INT_DELIVERY_MODE; + entry.dest.logical.logical_dest = target_cpus(); + entry.mask = 1; /* Disabled (masked) */ + entry.trigger = edge_level; + entry.polarity = active_high_low; + + add_pin_to_irq(irq, ioapic, pin); + + entry.vector = assign_irq_vector(irq); + + printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " + "IRQ %d Mode:%i Active:%i)\n", ioapic, + mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); + + if (edge_level) { + irq_desc[irq].handler = &ioapic_level_irq_type; + } else { + irq_desc[irq].handler = &ioapic_edge_irq_type; + } + + set_intr_gate(entry.vector, interrupt[irq]); + + if (!ioapic && (irq < 16)) + disable_8259A_irq(irq); + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return 0; +} + +#endif /*CONFIG_ACPI_BOOT*/ + +extern char opt_leveltrigger[], opt_edgetrigger[]; + +static int __init ioapic_trigger_setup(void) +{ + char *p; + irq_desc_t *desc; + long irq; + + p = opt_leveltrigger; + while ( *p != '\0' ) + { + irq = simple_strtol(p, &p, 10); + if ( (irq <= 0) || (irq >= NR_IRQS) ) + { + printk("IRQ '%ld' out of range in level-trigger list '%s'\n", + irq, opt_leveltrigger); + break; + } + + printk("Forcing IRQ %ld to level-trigger: ", irq); + + desc = &irq_desc[irq]; + spin_lock_irq(&desc->lock); + + if ( desc->handler == &ioapic_level_irq_type ) + { + printk("already level-triggered (no force applied).\n"); + } + else if ( desc->handler != &ioapic_edge_irq_type ) + { + printk("cannot force (can only force IO-APIC-edge IRQs).\n"); + } + else + { + desc->handler = &ioapic_level_irq_type; + __mask_IO_APIC_irq(irq); + __level_IO_APIC_irq(irq); + printk("done.\n"); + } + + spin_unlock_irq(&desc->lock); + + if ( *p == '\0' ) + break; + + if ( *p != ',' ) + { + printk("Unexpected character '%c' in level-trigger list '%s'\n", + *p, opt_leveltrigger); + break; + } + + p++; + } + + p = opt_edgetrigger; + while ( *p != '\0' ) + { + irq = simple_strtol(p, &p, 10); + if ( (irq <= 0) || (irq >= NR_IRQS) ) + { + printk("IRQ '%ld' out of range in edge-trigger list '%s'\n", + irq, opt_edgetrigger); + break; + } + + printk("Forcing IRQ %ld to edge-trigger: ", irq); + + desc = &irq_desc[irq]; + spin_lock_irq(&desc->lock); + + if ( desc->handler == &ioapic_edge_irq_type ) + { + printk("already edge-triggered (no force applied).\n"); + } + else if ( desc->handler != &ioapic_level_irq_type ) + { + printk("cannot force (can only force IO-APIC-level IRQs).\n"); + } + else + { + desc->handler = &ioapic_edge_irq_type; + __edge_IO_APIC_irq(irq); + desc->status |= IRQ_PENDING; /* may have lost a masked edge */ + printk("done.\n"); + } + + spin_unlock_irq(&desc->lock); + + if ( *p == '\0' ) + break; + + if ( *p != ',' ) + { + printk("Unexpected character '%c' in edge-trigger list '%s'\n", + *p, opt_edgetrigger); + break; + } + + p++; + } + + return 0; +} + +__initcall(ioapic_trigger_setup); diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c new file mode 100644 index 0000000000..f3daf15305 --- /dev/null +++ b/xen/arch/x86/irq.c @@ -0,0 +1,1100 @@ +/* + * linux/arch/i386/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setup_irqs with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +/* + * (mostly architecture independent, will move to kernel/irq.c in 2.5.) + * + * IRQs are in fact implemented a bit like signal handlers for the kernel. + * Naturally it's not a 1:1 relation, but there are similarities. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the apropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +/* + * Controller mappings for all interrupt sources: + */ +irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = +{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; + +#ifdef CONFIG_SMP +/* NB. XXX We'll want some way of fiddling with this from DOM0. */ +unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +#endif + +static void __do_IRQ_guest(int irq); + +/* + * Special irq handlers. + */ + +void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } + +/* + * Generic no controller code + */ + +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves, it doesnt deserve + * a generic callback i think. + */ +#if CONFIG_X86 + printk("unexpected IRQ trap at vector %02x\n", irq); +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + */ + ack_APIC_irq(); +#endif +#endif +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; + +atomic_t irq_err_count; +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG +atomic_t irq_mis_count; +#endif +#endif + +/* + * Generic, controller-independent functions: + */ + +/* + * Global interrupt locks for SMP. Allow interrupts to come in on any + * CPU, yet make cli/sti act globally to protect critical regions.. + */ + +#ifdef CONFIG_SMP +unsigned char global_irq_holder = 0xff; +unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */ + +#define MAXCOUNT 100000000 + +/* + * I had a lockup scenario where a tight loop doing + * spin_unlock()/spin_lock() on CPU#1 was racing with + * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but + * apparently the spin_unlock() information did not make it + * through to CPU#0 ... nasty, is this by design, do we have to limit + * 'memory update oscillation frequency' artificially like here? + * + * Such 'high frequency update' races can be avoided by careful design, but + * some of our major constructs like spinlocks use similar techniques, + * it would be nice to clarify this issue. Set this define to 0 if you + * want to check whether your system freezes. I suspect the delay done + * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but + * i thought that such things are guaranteed by design, since we use + * the 'LOCK' prefix. + */ +#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0 + +#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND +# define SYNC_OTHER_CORES(x) udelay(x+1) +#else +/* + * We have to allow irqs to arrive between __sti and __cli + */ +# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") +#endif + +static inline void wait_on_irq(int cpu) +{ + for (;;) { + + /* + * Wait until all interrupts are gone. Wait + * for bottom half handlers unless we're + * already executing in one.. + */ + if (!irqs_running()) + if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) + break; + + /* Duh, we have to loop. Release the lock to avoid deadlocks */ + clear_bit(0,&global_irq_lock); + + for (;;) { + __sti(); + SYNC_OTHER_CORES(cpu); + __cli(); + if (irqs_running()) + continue; + if (global_irq_lock) + continue; + if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) + continue; + if (!test_and_set_bit(0,&global_irq_lock)) + break; + } + } +} + +/* + * This is called when we want to synchronize with + * interrupts. We may for example tell a device to + * stop sending interrupts: but to make sure there + * are no interrupts that are executing on another + * CPU we need to call this function. + */ +void synchronize_irq(void) +{ + if (irqs_running()) { + /* Stupid approach */ + cli(); + sti(); + } +} + +static inline void get_irqlock(int cpu) +{ + if (test_and_set_bit(0,&global_irq_lock)) { + /* do we already hold the lock? */ + if ((unsigned char) cpu == global_irq_holder) + return; + /* Uhhuh.. Somebody else got it. Wait.. */ + do { + do { + rep_nop(); + } while (test_bit(0,&global_irq_lock)); + } while (test_and_set_bit(0,&global_irq_lock)); + } + /* + * We also to make sure that nobody else is running + * in an interrupt context. + */ + wait_on_irq(cpu); + + /* + * Ok, finally.. + */ + global_irq_holder = cpu; +} + +#define EFLAGS_IF_SHIFT 9 + +/* + * A global "cli()" while in an interrupt context + * turns into just a local cli(). Interrupts + * should use spinlocks for the (very unlikely) + * case that they ever want to protect against + * each other. + * + * If we already have local interrupts disabled, + * this will not turn a local disable into a + * global one (problems with spinlocks: this makes + * save_flags+cli+sti usable inside a spinlock). + */ +void __global_cli(void) +{ + unsigned int flags; + + __save_flags(flags); + if (flags & (1 << EFLAGS_IF_SHIFT)) { + int cpu = smp_processor_id(); + __cli(); + if (!local_irq_count(cpu)) + get_irqlock(cpu); + } +} + +void __global_sti(void) +{ + int cpu = smp_processor_id(); + + if (!local_irq_count(cpu)) + release_irqlock(cpu); + __sti(); +} + +/* + * SMP flags value to restore to: + * 0 - global cli + * 1 - global sti + * 2 - local cli + * 3 - local sti + */ +unsigned long __global_save_flags(void) +{ + int retval; + int local_enabled; + unsigned long flags; + int cpu = smp_processor_id(); + + __save_flags(flags); + local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1; + /* default to local */ + retval = 2 + local_enabled; + + /* check for global flags if we're not in an interrupt */ + if (!local_irq_count(cpu)) { + if (local_enabled) + retval = 1; + if (global_irq_holder == cpu) + retval = 0; + } + return retval; +} + +void __global_restore_flags(unsigned long flags) +{ + switch (flags) { + case 0: + __global_cli(); + break; + case 1: + __global_sti(); + break; + case 2: + __cli(); + break; + case 3: + __sti(); + break; + default: + printk("global_restore_flags: %08lx (%08lx)\n", + flags, (&flags)[-1]); + } +} + +#endif + +/* + * This should really return information about whether + * we should do bottom half handling etc. Right now we + * end up _always_ checking the bottom half, which is a + * waste of time and is not what some drivers would + * prefer. + */ +static int handle_IRQ_event(unsigned int irq, + struct pt_regs * regs, + struct irqaction * action) +{ + int status; + int cpu = smp_processor_id(); + + irq_enter(cpu, irq); + + status = 1; /* Force the "do bottom halves" bit */ + + if (!(action->flags & SA_INTERRUPT)) + __sti(); + + do { + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; + } while (action); + + __cli(); + + irq_exit(cpu, irq); + + return status; +} + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ + +/** + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + * + * This function may be called from IRQ context. + */ + +inline void disable_irq_nosync(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are + * nested. + * This function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * This function may be called - with care - from IRQ context. + */ + +void disable_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + + if (!local_irq_count(smp_processor_id())) { + do { + barrier(); + cpu_relax(); + } while (irq_desc[irq].status & IRQ_INPROGRESS); + } +} + +/** + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable + * + * Undoes the effect of one call to disable_irq(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + * + * This function may be called from IRQ context. + */ + +void enable_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { + case 1: { + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); + } + desc->handler->enable(irq); + /* fall-through */ + } + default: + desc->depth--; + break; + case 0: + printk("enable_irq(%u) unbalanced from %p\n", irq, + __builtin_return_address(0)); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +asmlinkage unsigned int do_IRQ(struct pt_regs regs) +{ + /* + * We ack quickly, we don't want the irq controller + * thinking we're snobs just because some other CPU has + * disabled global interrupts (we have already done the + * INT_ACK cycles, it's too late to try to pretend to the + * controller that we aren't taking the interrupt). + * + * 0 return value means that this irq is already being + * handled by some other CPU. (or is disabled) + */ + int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */ + irq_desc_t *desc = irq_desc + irq; + struct irqaction * action; + unsigned int status; + +#ifdef PERF_COUNTERS + int cpu = smp_processor_id(); + u32 cc_start, cc_end; + + perfc_incra(irqs, cpu); + rdtscl(cc_start); +#endif + + spin_lock(&desc->lock); + desc->handler->ack(irq); + + /* + REPLAY is when Linux resends an IRQ that was dropped earlier + WAITING is used by probe to mark irqs that are being tested + */ + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* We hook off guest-bound IRQs for special handling. */ + if ( status & IRQ_GUEST ) + { + __do_IRQ_guest(irq); + spin_unlock(&desc->lock); + return 1; + } + + /* + * If the IRQ is disabled for whatever reason, we cannot use the action we + * have. + */ + action = NULL; + if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { + action = desc->action; + status &= ~IRQ_PENDING; /* we commit to handling */ + status |= IRQ_INPROGRESS; /* we are handling it */ + } + desc->status = status; + + /* + * If there is no IRQ handler or it was disabled, exit early. Since we set + * PENDING, if another processor is handling a different instance of this + * same irq, the other processor will take care of it. + */ + if (!action) + goto out; + + /* + * Edge triggered interrupts need to remember pending events. This applies + * to any hw interrupts that allow a second instance of the same irq to + * arrive while we are in do_IRQ or in the handler. But the code here only + * handles the _second_ instance of the irq, not the third or fourth. So + * it is mostly useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + spin_unlock(&desc->lock); + handle_IRQ_event(irq, ®s, action); + spin_lock(&desc->lock); + + if (!(desc->status & IRQ_PENDING)) + break; + desc->status &= ~IRQ_PENDING; + } + desc->status &= ~IRQ_INPROGRESS; + out: + /* + * The ->end() handler has to deal with interrupts which got disabled + * while the handler was running. + */ + desc->handler->end(irq); + spin_unlock(&desc->lock); + +#ifdef PERF_COUNTERS + rdtscl(cc_end); + + if ( !action || (!(action->flags & SA_NOPROFILE)) ) + { + perfc_adda(irq_time, cpu, cc_end - cc_start); +#ifndef NDEBUG + if ( (cc_end - cc_start) > (cpu_khz * 100) ) + printk("Long interrupt %08x -> %08x\n", cc_start, cc_end); +#endif + } +#endif + + return 1; +} + +/** + * request_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. From the point this + * call is made your handler function may be invoked. Since + * your handler function must clear any interrupt the board + * raises, you must take care both to initialise your hardware + * and to set up the interrupt handler in the right order. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id + * as this is required when freeing the interrupt. + * + * Flags: + * + * SA_SHIRQ Interrupt is shared + * + * SA_INTERRUPT Disable local interrupts while processing + */ + +int request_irq(unsigned int irq, + void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char * devname, + void *dev_id) +{ + int retval; + struct irqaction * action; + + if (irq >= NR_IRQS) + return -EINVAL; + if (!handler) + return -EINVAL; + + action = (struct irqaction *) + kmalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags; + action->mask = 0; + action->name = devname; + action->next = NULL; + action->dev_id = dev_id; + + retval = setup_irq(irq, action); + if (retval) + kfree(action); + + return retval; +} + +/** + * free_irq - free an interrupt + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove an interrupt handler. The handler is removed and if the + * interrupt line is no longer in use by any driver it is disabled. + * On a shared IRQ the caller must ensure the interrupt is disabled + * on the card it drives before calling this function. The function + * does not return until any executing interrupts for this IRQ + * have completed. + * + * This function may be called from interrupt context. + * + * Bugs: Attempting to free an irq in a handler for the same irq hangs + * the machine. + */ + +void free_irq(unsigned int irq, void *dev_id) +{ + irq_desc_t *desc; + struct irqaction **p; + unsigned long flags; + + if (irq >= NR_IRQS) + return; + + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + for (;;) { + struct irqaction * action = *p; + if (action) { + struct irqaction **pp = p; + p = &action->next; + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *pp = action->next; + if (!desc->action) { + desc->status |= IRQ_DISABLED; + desc->handler->shutdown(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + +#ifdef CONFIG_SMP + /* Wait to make sure it's not being used on another CPU */ + while (desc->status & IRQ_INPROGRESS) { + barrier(); + cpu_relax(); + } +#endif + kfree(action); + return; + } + printk("Trying to free free IRQ%d\n",irq); + spin_unlock_irqrestore(&desc->lock,flags); + return; + } +} + +/* + * IRQ autodetection code.. + * + * This depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck + * with "IRQ_WAITING" cleared and the interrupt + * disabled. + */ + +static spinlock_t probe_sem = SPIN_LOCK_UNLOCKED; + +/** + * probe_irq_on - begin an interrupt autodetect + * + * Commence probing for an interrupt. The interrupts are scanned + * and a mask of potential interrupt lines is returned. + * + */ + +unsigned long probe_irq_on(void) +{ + unsigned int i; + irq_desc_t *desc; + unsigned long val; + unsigned long s=0, e=0; + + spin_lock(&probe_sem); + /* + * something may have generated an irq long ago and we want to + * flush such a longstanding irq before considering it as spurious. + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!irq_desc[i].action) + irq_desc[i].handler->startup(i); + spin_unlock_irq(&desc->lock); + } + + /* Wait for longstanding interrupts to trigger (20ms delay). */ + rdtscl(s); + do { + synchronize_irq(); + rdtscl(e); + } while ( ((e-s)/ticks_per_usec) < 20000 ); + + /* + * enable any unassigned irqs + * (we must startup again here because if a longstanding irq + * happened in the previous stage, it may have masked itself) + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!desc->action) { + desc->status |= IRQ_AUTODETECT | IRQ_WAITING; + if (desc->handler->startup(i)) + desc->status |= IRQ_PENDING; + } + spin_unlock_irq(&desc->lock); + } + + /* + * Wait for spurious interrupts to trigger (100ms delay). + */ + rdtscl(s); + do { + synchronize_irq(); + rdtscl(e); + } while ( ((e-s)/ticks_per_usec) < 100000 ); + + /* + * Now filter out any obviously spurious interrupts + */ + val = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + /* It triggered already - consider it spurious. */ + if (!(status & IRQ_WAITING)) { + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } else + if (i < 32) + val |= 1 << i; + } + spin_unlock_irq(&desc->lock); + } + + return val; +} + +/* + * Return a mask of triggered interrupts (this + * can handle only legacy ISA interrupts). + */ + +/** + * probe_irq_mask - scan a bitmap of interrupt lines + * @val: mask of interrupts to consider + * + * Scan the ISA bus interrupt lines and return a bitmap of + * active interrupts. The interrupt probe logic state is then + * returned to its previous value. + * + * Note: we need to scan all the irq's even though we will + * only return ISA irq numbers - just so that we reset them + * all to a known state. + */ +unsigned int probe_irq_mask(unsigned long val) +{ + int i; + unsigned int mask; + + mask = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (i < 16 && !(status & IRQ_WAITING)) + mask |= 1 << i; + + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + spin_unlock(&probe_sem); + + return mask & val; +} + +/* + * Return the one interrupt that triggered (this can + * handle any interrupt source). + */ + +/** + * probe_irq_off - end an interrupt autodetect + * @val: mask of potential interrupts (unused) + * + * Scans the unused interrupt lines and returns the line which + * appears to have triggered the interrupt. If no interrupt was + * found then zero is returned. If more than one interrupt is + * found then minus the first candidate is returned to indicate + * their is doubt. + * + * The interrupt probe logic state is returned to its previous + * value. + * + * BUGS: When used in a module (which arguably shouldnt happen) + * nothing prevents two IRQ probe callers from overlapping. The + * results of this are non-optimal. + */ + +int probe_irq_off(unsigned long val) +{ + int i, irq_found, nr_irqs; + + nr_irqs = 0; + irq_found = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = i; + nr_irqs++; + } + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + spin_unlock(&probe_sem); + + if (nr_irqs > 1) + irq_found = -irq_found; + return irq_found; +} + +/* this was setup_x86_irq but it seems pretty generic */ +int setup_irq(unsigned int irq, struct irqaction * new) +{ + int shared = 0; + unsigned long flags; + struct irqaction *old, **p; + irq_desc_t *desc = irq_desc + irq; + + /* + * The following block of code has to be executed atomically + */ + spin_lock_irqsave(&desc->lock,flags); + + if ( desc->status & IRQ_GUEST ) + { + spin_unlock_irqrestore(&desc->lock,flags); + return -EBUSY; + } + + p = &desc->action; + if ((old = *p) != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&desc->lock,flags); + return -EBUSY; + } + + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + + *p = new; + + if (!shared) { + desc->depth = 0; + desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); + desc->handler->startup(irq); + } + + spin_unlock_irqrestore(&desc->lock,flags); + + return 0; +} + + + +/* + * HANDLING OF GUEST-BOUND PHYSICAL IRQS + */ + +#define IRQ_MAX_GUESTS 7 +typedef struct { + u8 nr_guests; + u8 in_flight; + u8 shareable; + struct task_struct *guest[IRQ_MAX_GUESTS]; +} irq_guest_action_t; + +static void __do_IRQ_guest(int irq) +{ + irq_desc_t *desc = &irq_desc[irq]; + irq_guest_action_t *action = (irq_guest_action_t *)desc->action; + struct task_struct *p; + int i; + + for ( i = 0; i < action->nr_guests; i++ ) + { + p = action->guest[i]; + if ( !test_and_set_bit(irq, &p->pirq_mask) ) + action->in_flight++; + send_guest_pirq(p, irq); + } +} + +int pirq_guest_unmask(struct task_struct *p) +{ + irq_desc_t *desc; + int i, j, pirq; + u32 m; + shared_info_t *s = p->shared_info; + + for ( i = 0; i < 2; i++ ) + { + m = p->pirq_mask[i]; + while ( (j = ffs(m)) != 0 ) + { + m &= ~(1 << --j); + pirq = (i << 5) + j; + desc = &irq_desc[pirq]; + spin_lock_irq(&desc->lock); + if ( !test_bit(p->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) && + test_and_clear_bit(pirq, &p->pirq_mask) && + (--((irq_guest_action_t *)desc->action)->in_flight == 0) ) + desc->handler->end(pirq); + spin_unlock_irq(&desc->lock); + } + } + + return 0; +} + +int pirq_guest_bind(struct task_struct *p, int irq, int will_share) +{ + unsigned long flags; + irq_desc_t *desc = &irq_desc[irq]; + irq_guest_action_t *action; + int rc = 0; + + if ( !IS_CAPABLE_PHYSDEV(p) ) + return -EPERM; + + spin_lock_irqsave(&desc->lock, flags); + + action = (irq_guest_action_t *)desc->action; + + if ( !(desc->status & IRQ_GUEST) ) + { + if ( desc->action != NULL ) + { + DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n", + irq, desc->action->name); + rc = -EBUSY; + goto out; + } + + action = kmalloc(sizeof(irq_guest_action_t), GFP_KERNEL); + if ( (desc->action = (struct irqaction *)action) == NULL ) + { + DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq); + rc = -ENOMEM; + goto out; + } + + action->nr_guests = 0; + action->in_flight = 0; + action->shareable = will_share; + + desc->depth = 0; + desc->status |= IRQ_GUEST; + desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); + desc->handler->startup(irq); + + /* Attempt to bind the interrupt target to the correct CPU. */ + if ( desc->handler->set_affinity != NULL ) + desc->handler->set_affinity( + irq, apicid_to_phys_cpu_present(p->processor)); + } + else if ( !will_share || !action->shareable ) + { + DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n", + irq); + rc = -EBUSY; + goto out; + } + + if ( action->nr_guests == IRQ_MAX_GUESTS ) + { + DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq); + rc = -EBUSY; + goto out; + } + + action->guest[action->nr_guests++] = p; + + out: + spin_unlock_irqrestore(&desc->lock, flags); + return rc; +} + +int pirq_guest_unbind(struct task_struct *p, int irq) +{ + unsigned long flags; + irq_desc_t *desc = &irq_desc[irq]; + irq_guest_action_t *action; + int i; + + spin_lock_irqsave(&desc->lock, flags); + + action = (irq_guest_action_t *)desc->action; + + if ( test_and_clear_bit(irq, &p->pirq_mask) && + (--action->in_flight == 0) ) + desc->handler->end(irq); + + if ( action->nr_guests == 1 ) + { + desc->action = NULL; + kfree(action); + desc->status |= IRQ_DISABLED; + desc->status &= ~IRQ_GUEST; + desc->handler->shutdown(irq); + } + else + { + i = 0; + while ( action->guest[i] != p ) + i++; + memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1); + action->nr_guests--; + } + + spin_unlock_irqrestore(&desc->lock, flags); + return 0; +} diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c new file mode 100644 index 0000000000..57c99aba62 --- /dev/null +++ b/xen/arch/x86/mm.c @@ -0,0 +1,412 @@ +/****************************************************************************** + * arch/i386/mm.c + * + * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static inline void set_pte_phys(unsigned long vaddr, + l1_pgentry_t entry) +{ + l2_pgentry_t *l2ent; + l1_pgentry_t *l1ent; + + l2ent = &idle_pg_table[l2_table_offset(vaddr)]; + l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr); + *l1ent = entry; + + /* It's enough to flush this one mapping. */ + __flush_tlb_one(vaddr); +} + + +void __set_fixmap(enum fixed_addresses idx, + l1_pgentry_t entry) +{ + unsigned long address = __fix_to_virt(idx); + + if ( likely(idx < __end_of_fixed_addresses) ) + set_pte_phys(address, entry); + else + printk("Invalid __set_fixmap\n"); +} + + +static void __init fixrange_init(unsigned long start, + unsigned long end, + l2_pgentry_t *pg_base) +{ + l2_pgentry_t *l2e; + int i; + unsigned long vaddr, page; + + vaddr = start; + i = l2_table_offset(vaddr); + l2e = pg_base + i; + + for ( ; (i < ENTRIES_PER_L2_PAGETABLE) && (vaddr != end); l2e++, i++ ) + { + if ( !l2_pgentry_empty(*l2e) ) + continue; + page = (unsigned long)get_free_page(GFP_KERNEL); + clear_page(page); + *l2e = mk_l2_pgentry(__pa(page) | __PAGE_HYPERVISOR); + vaddr += 1 << L2_PAGETABLE_SHIFT; + } +} + +void __init paging_init(void) +{ + unsigned long addr; + void *ioremap_pt; + int i; + + /* Idle page table 1:1 maps the first part of physical memory. */ + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) + idle_pg_table[i] = + mk_l2_pgentry((i << L2_PAGETABLE_SHIFT) | + __PAGE_HYPERVISOR | _PAGE_PSE); + + /* + * Fixed mappings, only the page table structure has to be + * created - mappings will be set by set_fixmap(): + */ + addr = FIXADDR_START & ~((1<> L2_PAGETABLE_SHIFT] = + mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR); + + /* Create read-only mapping of MPT for guest-OS use. */ + idle_pg_table[READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] = + idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT]; + mk_l2_readonly(idle_pg_table + + (READONLY_MPT_VIRT_START >> L2_PAGETABLE_SHIFT)); + + /* Set up mapping cache for domain pages. */ + mapcache = (unsigned long *)get_free_page(GFP_KERNEL); + clear_page(mapcache); + idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] = + mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR); + + /* Set up linear page table mapping. */ + idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = + mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR); + +} + +void __init zap_low_mappings(void) +{ + int i; + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) + idle_pg_table[i] = mk_l2_pgentry(0); + flush_tlb_all_pge(); +} + + +long do_stack_switch(unsigned long ss, unsigned long esp) +{ + int nr = smp_processor_id(); + struct tss_struct *t = &init_tss[nr]; + + /* We need to do this check as we load and use SS on guest's behalf. */ + if ( (ss & 3) == 0 ) + return -EPERM; + + current->thread.guestos_ss = ss; + current->thread.guestos_sp = esp; + t->ss1 = ss; + t->esp1 = esp; + + return 0; +} + + +/* Returns TRUE if given descriptor is valid for GDT or LDT. */ +int check_descriptor(unsigned long a, unsigned long b) +{ + unsigned long base, limit; + + /* A not-present descriptor will always fault, so is safe. */ + if ( !(b & _SEGMENT_P) ) + goto good; + + /* + * We don't allow a DPL of zero. There is no legitimate reason for + * specifying DPL==0, and it gets rather dangerous if we also accept call + * gates (consider a call gate pointing at another guestos descriptor with + * DPL 0 -- this would get the OS ring-0 privileges). + */ + if ( (b & _SEGMENT_DPL) == 0 ) + goto bad; + + if ( !(b & _SEGMENT_S) ) + { + /* + * System segment: + * 1. Don't allow interrupt or trap gates as they belong in the IDT. + * 2. Don't allow TSS descriptors or task gates as we don't + * virtualise x86 tasks. + * 3. Don't allow LDT descriptors because they're unnecessary and + * I'm uneasy about allowing an LDT page to contain LDT + * descriptors. In any case, Xen automatically creates the + * required descriptor when reloading the LDT register. + * 4. We allow call gates but they must not jump to a private segment. + */ + + /* Disallow everything but call gates. */ + if ( (b & _SEGMENT_TYPE) != 0xc00 ) + goto bad; + + /* Can't allow far jump to a Xen-private segment. */ + if ( !VALID_CODESEL(a>>16) ) + goto bad; + + /* Reserved bits must be zero. */ + if ( (b & 0xe0) != 0 ) + goto bad; + + /* No base/limit check is needed for a call gate. */ + goto good; + } + + /* Check that base/limit do not overlap Xen-private space. */ + base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16); + limit = (b&0xf0000) | (a&0xffff); + limit++; /* We add one because limit is inclusive. */ + if ( (b & _SEGMENT_G) ) + limit <<= 12; + if ( ((base + limit) <= base) || + ((base + limit) > PAGE_OFFSET) ) + goto bad; + + good: + return 1; + bad: + return 0; +} + + +long set_gdt(struct task_struct *p, + unsigned long *frames, + unsigned int entries) +{ + /* NB. There are 512 8-byte entries per GDT page. */ + int i, nr_pages = (entries + 511) / 512; + unsigned long pfn; + struct desc_struct *vgdt; + + /* Check the new GDT. */ + for ( i = 0; i < nr_pages; i++ ) + { + if ( unlikely(frames[i] >= max_page) || + unlikely(!get_page_and_type(&frame_table[frames[i]], + p, PGT_gdt_page)) ) + goto fail; + } + + /* Copy reserved GDT entries to the new GDT. */ + vgdt = map_domain_mem(frames[0] << PAGE_SHIFT); + memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY, + gdt_table + FIRST_RESERVED_GDT_ENTRY, + NR_RESERVED_GDT_ENTRIES*8); + unmap_domain_mem(vgdt); + + /* Tear down the old GDT. */ + for ( i = 0; i < 16; i++ ) + { + if ( (pfn = l1_pgentry_to_pagenr(p->mm.perdomain_pt[i])) != 0 ) + put_page_and_type(&frame_table[pfn]); + p->mm.perdomain_pt[i] = mk_l1_pgentry(0); + } + + /* Install the new GDT. */ + for ( i = 0; i < nr_pages; i++ ) + p->mm.perdomain_pt[i] = + mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR); + + SET_GDT_ADDRESS(p, GDT_VIRT_START); + SET_GDT_ENTRIES(p, (entries*8)-1); + + return 0; + + fail: + while ( i-- > 0 ) + put_page_and_type(&frame_table[frames[i]]); + return -EINVAL; +} + + +long do_set_gdt(unsigned long *frame_list, unsigned int entries) +{ + int nr_pages = (entries + 511) / 512; + unsigned long frames[16]; + long ret; + + if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) ) + return -EINVAL; + + if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) ) + return -EFAULT; + + if ( (ret = set_gdt(current, frames, entries)) == 0 ) + { + local_flush_tlb(); + __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt)); + } + + return ret; +} + + +long do_update_descriptor( + unsigned long pa, unsigned long word1, unsigned long word2) +{ + unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT; + struct pfn_info *page; + long ret = -EINVAL; + + if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) ) + return -EINVAL; + + page = &frame_table[pfn]; + if ( unlikely(!get_page(page, current)) ) + goto out; + + /* Check if the given frame is in use in an unsafe context. */ + switch ( page->type_and_flags & PGT_type_mask ) + { + case PGT_gdt_page: + /* Disallow updates of Xen-reserved descriptors in the current GDT. */ + if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) && + (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) && + (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) ) + goto out; + if ( unlikely(!get_page_type(page, PGT_gdt_page)) ) + goto out; + break; + case PGT_ldt_page: + if ( unlikely(!get_page_type(page, PGT_ldt_page)) ) + goto out; + break; + default: + if ( unlikely(!get_page_type(page, PGT_writeable_page)) ) + goto out; + break; + } + + /* All is good so make the update. */ + gdt_pent = map_domain_mem(pa); + gdt_pent[0] = word1; + gdt_pent[1] = word2; + unmap_domain_mem(gdt_pent); + + put_page_type(page); + + ret = 0; /* success */ + + out: + put_page(page); + return ret; +} + +#ifdef MEMORY_GUARD + +void *memguard_init(void *heap_start) +{ + l1_pgentry_t *l1; + int i, j; + + /* Round the allocation pointer up to a page boundary. */ + heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) & + PAGE_MASK); + + /* Memory guarding is incompatible with super pages. */ + for ( i = 0; i < (MAX_MONITOR_ADDRESS >> L2_PAGETABLE_SHIFT); i++ ) + { + l1 = (l1_pgentry_t *)heap_start; + heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE); + for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ ) + l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) | + (j << L1_PAGETABLE_SHIFT) | + __PAGE_HYPERVISOR); + idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] = + mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR); + } + + return heap_start; +} + +static void __memguard_change_range(void *p, unsigned long l, int guard) +{ + l1_pgentry_t *l1; + l2_pgentry_t *l2; + unsigned long _p = (unsigned long)p; + unsigned long _l = (unsigned long)l; + + /* Ensure we are dealing with a page-aligned whole number of pages. */ + ASSERT((_p&PAGE_MASK) != 0); + ASSERT((_l&PAGE_MASK) != 0); + ASSERT((_p&~PAGE_MASK) == 0); + ASSERT((_l&~PAGE_MASK) == 0); + + while ( _l != 0 ) + { + l2 = &idle_pg_table[l2_table_offset(_p)]; + l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p); + if ( guard ) + *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT); + else + *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT); + _p += PAGE_SIZE; + _l -= PAGE_SIZE; + } +} + +void memguard_guard_range(void *p, unsigned long l) +{ + __memguard_change_range(p, l, 1); + local_flush_tlb(); +} + +void memguard_unguard_range(void *p, unsigned long l) +{ + __memguard_change_range(p, l, 0); +} + +int memguard_is_guarded(void *p) +{ + l1_pgentry_t *l1; + l2_pgentry_t *l2; + unsigned long _p = (unsigned long)p; + l2 = &idle_pg_table[l2_table_offset(_p)]; + l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p); + return !(l1_pgentry_val(*l1) & _PAGE_PRESENT); +} + +#endif diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c new file mode 100644 index 0000000000..3a77390ba9 --- /dev/null +++ b/xen/arch/x86/mpparse.c @@ -0,0 +1,1381 @@ +/* + * Intel Multiprocessor Specificiation 1.1 and 1.4 + * compliant MP-table parsing routines. + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998, 1999, 2000 Ingo Molnar + * + * Fixes + * Erich Boleyn : MP v1.4 and additional changes. + * Alan Cox : Added EBDA scanning + * Ingo Molnar : various cleanups and rewrites + * Maciej W. Rozycki: Bits for default MP configurations + * Paul Diefenbaugh: Added full ACPI support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int numnodes = 1; /* XXX Xen */ + +/* Have we found an MP table */ +int smp_found_config; + +/* + * Various Linux-internal data structures created from the + * MP-table. + */ +int apic_version [MAX_APICS]; +int quad_local_to_mp_bus_id [NR_CPUS/4][4]; +int mp_current_pci_id; +int *mp_bus_id_to_type; +int *mp_bus_id_to_node; +int *mp_bus_id_to_local; +int *mp_bus_id_to_pci_bus; +int max_mp_busses; +int max_irq_sources; + +/* I/O APIC entries */ +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; + +/* # of MP IRQ source entries */ +struct mpc_config_intsrc *mp_irqs; + +/* MP IRQ source entries */ +int mp_irq_entries; + +int nr_ioapics; + +int pic_mode; +unsigned long mp_lapic_addr; + +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; +unsigned int boot_cpu_logical_apicid = -1U; +/* Internal processor count */ +static unsigned int num_processors; + +/* Bitmask of physically existing CPUs */ +unsigned long phys_cpu_present_map; +unsigned long logical_cpu_present_map; + +#ifdef CONFIG_X86_CLUSTERED_APIC +unsigned char esr_disable = 0; +unsigned char clustered_apic_mode = CLUSTERED_APIC_NONE; +unsigned int apic_broadcast_id = APIC_BROADCAST_ID_APIC; +#endif +unsigned char raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; + +/* + * Intel MP BIOS table parsing routines: + */ + +#ifndef CONFIG_X86_VISWS_APIC +/* + * Checksum an MP configuration block. + */ + +static int __init mpf_checksum(unsigned char *mp, int len) +{ + int sum = 0; + + while (len--) + sum += *mp++; + + return sum & 0xFF; +} + +/* + * Processor encoding in an MP configuration block + */ + +static char __init *mpc_family(int family,int model) +{ + static char n[32]; + static char *model_defs[]= + { + "80486DX","80486DX", + "80486SX","80486DX/2 or 80487", + "80486SL","80486SX/2", + "Unknown","80486DX/2-WB", + "80486DX/4","80486DX/4-WB" + }; + + switch (family) { + case 0x04: + if (model < 10) + return model_defs[model]; + break; + + case 0x05: + return("Pentium(tm)"); + + case 0x06: + return("Pentium(tm) Pro"); + + case 0x0F: + if (model == 0x00) + return("Pentium 4(tm)"); + if (model == 0x01) + return("Pentium 4(tm)"); + if (model == 0x02) + return("Pentium 4(tm) XEON(tm)"); + if (model == 0x0F) + return("Special controller"); + } + sprintf(n,"Unknown CPU [%d:%d]",family, model); + return n; +} + +/* + * Have to match translation table entries to main table entries by counter + * hence the mpc_record variable .... can't see a less disgusting way of + * doing this .... + */ + +static int mpc_record; +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; + +void __init MP_processor_info (struct mpc_config_processor *m) +{ + int ver, quad, logical_apicid; + + if (!(m->mpc_cpuflag & CPU_ENABLED)) + return; + + logical_apicid = m->mpc_apicid; + if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { + quad = translation_table[mpc_record]->trans_quad; + logical_apicid = (quad << 4) + + (m->mpc_apicid ? m->mpc_apicid << 1 : 1); + printk("Processor #%d %s APIC version %d (quad %d, apic %d)\n", + m->mpc_apicid, + mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , + (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), + m->mpc_apicver, quad, logical_apicid); + } else { + printk("Processor #%d %s APIC version %d\n", + m->mpc_apicid, + mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , + (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), + m->mpc_apicver); + } + + if (m->mpc_featureflag&(1<<0)) + Dprintk(" Floating point unit present.\n"); + if (m->mpc_featureflag&(1<<7)) + Dprintk(" Machine Exception supported.\n"); + if (m->mpc_featureflag&(1<<8)) + Dprintk(" 64 bit compare & exchange supported.\n"); + if (m->mpc_featureflag&(1<<9)) + Dprintk(" Internal APIC present.\n"); + if (m->mpc_featureflag&(1<<11)) + Dprintk(" SEP present.\n"); + if (m->mpc_featureflag&(1<<12)) + Dprintk(" MTRR present.\n"); + if (m->mpc_featureflag&(1<<13)) + Dprintk(" PGE present.\n"); + if (m->mpc_featureflag&(1<<14)) + Dprintk(" MCA present.\n"); + if (m->mpc_featureflag&(1<<15)) + Dprintk(" CMOV present.\n"); + if (m->mpc_featureflag&(1<<16)) + Dprintk(" PAT present.\n"); + if (m->mpc_featureflag&(1<<17)) + Dprintk(" PSE present.\n"); + if (m->mpc_featureflag&(1<<18)) + Dprintk(" PSN present.\n"); + if (m->mpc_featureflag&(1<<19)) + Dprintk(" Cache Line Flush Instruction present.\n"); + /* 20 Reserved */ + if (m->mpc_featureflag&(1<<21)) + Dprintk(" Debug Trace and EMON Store present.\n"); + if (m->mpc_featureflag&(1<<22)) + Dprintk(" ACPI Thermal Throttle Registers present.\n"); + if (m->mpc_featureflag&(1<<23)) + Dprintk(" MMX present.\n"); + if (m->mpc_featureflag&(1<<24)) + Dprintk(" FXSR present.\n"); + if (m->mpc_featureflag&(1<<25)) + Dprintk(" XMM present.\n"); + if (m->mpc_featureflag&(1<<26)) + Dprintk(" Willamette New Instructions present.\n"); + if (m->mpc_featureflag&(1<<27)) + Dprintk(" Self Snoop present.\n"); + if (m->mpc_featureflag&(1<<28)) + Dprintk(" HT present.\n"); + if (m->mpc_featureflag&(1<<29)) + Dprintk(" Thermal Monitor present.\n"); + /* 30, 31 Reserved */ + + + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + Dprintk(" Bootup CPU\n"); + boot_cpu_physical_apicid = m->mpc_apicid; + boot_cpu_logical_apicid = logical_apicid; + } + + if (num_processors >= NR_CPUS){ + printk(KERN_WARNING "NR_CPUS limit of %i reached. Cannot " + "boot CPU(apicid 0x%x).\n", NR_CPUS, m->mpc_apicid); + return; + } + num_processors++; + + if (m->mpc_apicid > MAX_APICS) { + printk("Processor #%d INVALID. (Max ID: %d).\n", + m->mpc_apicid, MAX_APICS); + --num_processors; + return; + } + ver = m->mpc_apicver; + + logical_cpu_present_map |= 1 << (num_processors-1); + phys_cpu_present_map |= apicid_to_phys_cpu_present(m->mpc_apicid); + + /* + * Validate version + */ + if (ver == 0x0) { + printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); + ver = 0x10; + } + apic_version[m->mpc_apicid] = ver; + raw_phys_apicid[num_processors - 1] = m->mpc_apicid; +} + +static void __init MP_bus_info (struct mpc_config_bus *m) +{ + char str[7]; + int quad; + + memcpy(str, m->mpc_bustype, 6); + str[6] = 0; + + if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { + quad = translation_table[mpc_record]->trans_quad; + mp_bus_id_to_node[m->mpc_busid] = quad; + mp_bus_id_to_local[m->mpc_busid] = translation_table[mpc_record]->trans_local; + quad_local_to_mp_bus_id[quad][translation_table[mpc_record]->trans_local] = m->mpc_busid; + printk("Bus #%d is %s (node %d)\n", m->mpc_busid, str, quad); + } else { + Dprintk("Bus #%d is %s\n", m->mpc_busid, str); + } + + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; + } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; + } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; + mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; + mp_current_pci_id++; + } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; + } else { + printk("Unknown bustype %s - ignoring\n", str); + } +} + +static void __init MP_ioapic_info (struct mpc_config_ioapic *m) +{ + if (!(m->mpc_flags & MPC_APIC_USABLE)) + return; + + printk("I/O APIC #%d Version %d at 0x%X.\n", + m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); + if (nr_ioapics >= MAX_IO_APICS) { + printk("Max # of I/O APICs (%d) exceeded (found %d).\n", + MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); + } + if (!m->mpc_apicaddr) { + printk(KERN_ERR "WARNING: bogus zero I/O APIC address" + " found in MP table, skipping!\n"); + return; + } + mp_ioapics[nr_ioapics] = *m; + nr_ioapics++; +} + +static void __init MP_intsrc_info (struct mpc_config_intsrc *m) +{ + mp_irqs [mp_irq_entries] = *m; + Dprintk("Int: type %d, pol %d, trig %d, bus %d," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, + m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); + if (++mp_irq_entries == max_irq_sources) + panic("Max # of irq sources exceeded!!\n"); +} + +static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) +{ + Dprintk("Lint: type %d, pol %d, trig %d, bus %d," + " IRQ %02x, APIC ID %x, APIC LINT %02x\n", + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); + /* + * Well it seems all SMP boards in existence + * use ExtINT/LVT1 == LINT0 and + * NMI/LVT2 == LINT1 - the following check + * will show us if this assumptions is false. + * Until then we do not have to add baggage. + */ + if ((m->mpc_irqtype == mp_ExtINT) && + (m->mpc_destapiclint != 0)) + BUG(); + if ((m->mpc_irqtype == mp_NMI) && + (m->mpc_destapiclint != 1)) + BUG(); +} + +static void __init MP_translation_info (struct mpc_config_translation *m) +{ + printk("Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); + + if (mpc_record >= MAX_MPC_ENTRY) + printk("MAX_MPC_ENTRY exceeded!\n"); + else + translation_table[mpc_record] = m; /* stash this for later */ + if (m->trans_quad+1 > numnodes) + numnodes = m->trans_quad+1; +} + +/* + * Read/parse the MPC oem tables + */ + +static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \ + unsigned short oemsize) +{ + int count = sizeof (*oemtable); /* the header size */ + unsigned char *oemptr = ((unsigned char *)oemtable)+count; + + printk("Found an OEM MPC table at %8p - parsing it ... \n", oemtable); + if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4)) + { + printk("SMP mpc oemtable: bad signature [%c%c%c%c]!\n", + oemtable->oem_signature[0], + oemtable->oem_signature[1], + oemtable->oem_signature[2], + oemtable->oem_signature[3]); + return; + } + if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length)) + { + printk("SMP oem mptable: checksum error!\n"); + return; + } + while (count < oemtable->oem_length) { + switch (*oemptr) { + case MP_TRANSLATION: + { + struct mpc_config_translation *m= + (struct mpc_config_translation *)oemptr; + MP_translation_info(m); + oemptr += sizeof(*m); + count += sizeof(*m); + ++mpc_record; + break; + } + default: + { + printk("Unrecognised OEM table entry type! - %d\n", (int) *oemptr); + return; + } + } + } +} + +/* + * Read/parse the MPC + */ + +static int __init smp_read_mpc(struct mp_config_table *mpc) +{ + char oem[16], prod[14]; + int count=sizeof(*mpc); + unsigned char *mpt=((unsigned char *)mpc)+count; + int num_bus = 0; + int num_irq = 0; + unsigned char *bus_data; + + if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { + panic("SMP mptable: bad signature [%c%c%c%c]!\n", + mpc->mpc_signature[0], + mpc->mpc_signature[1], + mpc->mpc_signature[2], + mpc->mpc_signature[3]); + return 0; + } + if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { + panic("SMP mptable: checksum error!\n"); + return 0; + } + if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { + printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", + mpc->mpc_spec); + return 0; + } + if (!mpc->mpc_lapic) { + printk(KERN_ERR "SMP mptable: null local APIC address!\n"); + return 0; + } + memcpy(oem,mpc->mpc_oem,8); + oem[8]=0; + printk("OEM ID: %s ",oem); + + memcpy(prod,mpc->mpc_productid,12); + prod[12]=0; + printk("Product ID: %s ",prod); + + detect_clustered_apic(oem, prod); + + printk("APIC at: 0x%X\n",mpc->mpc_lapic); + + /* + * Save the local APIC address (it might be non-default) -- but only + * if we're not using ACPI. + */ + if (!acpi_lapic) + mp_lapic_addr = mpc->mpc_lapic; + + if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) { + /* We need to process the oem mpc tables to tell us which quad things are in ... */ + mpc_record = 0; + smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, mpc->mpc_oemsize); + mpc_record = 0; + } + + /* Pre-scan to determine the number of bus and + * interrupts records we have + */ + while (count < mpc->mpc_length) { + switch (*mpt) { + case MP_PROCESSOR: + mpt += sizeof(struct mpc_config_processor); + count += sizeof(struct mpc_config_processor); + break; + case MP_BUS: + ++num_bus; + mpt += sizeof(struct mpc_config_bus); + count += sizeof(struct mpc_config_bus); + break; + case MP_INTSRC: + ++num_irq; + mpt += sizeof(struct mpc_config_intsrc); + count += sizeof(struct mpc_config_intsrc); + break; + case MP_IOAPIC: + mpt += sizeof(struct mpc_config_ioapic); + count += sizeof(struct mpc_config_ioapic); + break; + case MP_LINTSRC: + mpt += sizeof(struct mpc_config_lintsrc); + count += sizeof(struct mpc_config_lintsrc); + break; + default: + count = mpc->mpc_length; + break; + } + } + /* + * Paranoia: Allocate one extra of both the number of busses and number + * of irqs, and make sure that we have at least 4 interrupts per PCI + * slot. But some machines do not report very many busses, so we need + * to fall back on the older defaults. + */ + ++num_bus; + max_mp_busses = max(num_bus, MAX_MP_BUSSES); + if (num_irq < (4 * max_mp_busses)) + num_irq = 4 * num_bus; /* 4 intr/PCI slot */ + ++num_irq; + max_irq_sources = max(num_irq, MAX_IRQ_SOURCES); + + count = (max_mp_busses * sizeof(int)) * 4; + count += (max_irq_sources * sizeof(struct mpc_config_intsrc)); + bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(count)); + if (!bus_data) { + printk(KERN_ERR "SMP mptable: out of memory!\n"); + return 0; + } + mp_bus_id_to_type = (int *)&bus_data[0]; + mp_bus_id_to_node = (int *)&bus_data[(max_mp_busses * sizeof(int))]; + mp_bus_id_to_local = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 2]; + mp_bus_id_to_pci_bus = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 3]; + mp_irqs = (struct mpc_config_intsrc *)&bus_data[(max_mp_busses * sizeof(int)) * 4]; + memset(mp_bus_id_to_pci_bus, -1, max_mp_busses * sizeof(int)); + + /* + * Now process the configuration blocks. + */ + count = sizeof(*mpc); + mpt = ((unsigned char *)mpc)+count; + while (count < mpc->mpc_length) { + switch(*mpt) { + case MP_PROCESSOR: + { + struct mpc_config_processor *m= + (struct mpc_config_processor *)mpt; + /* ACPI may have already provided this data */ + if (!acpi_lapic) + MP_processor_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m= + (struct mpc_config_bus *)mpt; + MP_bus_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { + struct mpc_config_ioapic *m= + (struct mpc_config_ioapic *)mpt; + MP_ioapic_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_INTSRC: + { + struct mpc_config_intsrc *m= + (struct mpc_config_intsrc *)mpt; + + MP_intsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_LINTSRC: + { + struct mpc_config_lintsrc *m= + (struct mpc_config_lintsrc *)mpt; + MP_lintsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + default: + { + count = mpc->mpc_length; + break; + } + } + ++mpc_record; + } + + if (clustered_apic_mode){ + phys_cpu_present_map = logical_cpu_present_map; + } + + + printk("Enabling APIC mode: "); + if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) + printk("Clustered Logical. "); + else if(clustered_apic_mode == CLUSTERED_APIC_XAPIC) + printk("Physical. "); + else + printk("Flat. "); + printk("Using %d I/O APICs\n",nr_ioapics); + + if (!num_processors) + printk(KERN_ERR "SMP mptable: no processors registered!\n"); + return num_processors; +} + +static int __init ELCR_trigger(unsigned int irq) +{ + unsigned int port; + + port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; +} + +static void __init construct_default_ioirq_mptable(int mpc_default_type) +{ + struct mpc_config_intsrc intsrc; + int i; + int ELCR_fallback = 0; + + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqflag = 0; /* conforming */ + intsrc.mpc_srcbus = 0; + intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; + + intsrc.mpc_irqtype = mp_INT; + + /* + * If true, we have an ISA/PCI system with no IRQ entries + * in the MP table. To prevent the PCI interrupts from being set up + * incorrectly, we try to use the ELCR. The sanity check to see if + * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can + * never be level sensitive, so we simply see if the ELCR agrees. + * If it does, we assume it's valid. + */ + if (mpc_default_type == 5) { + printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); + + if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) + printk("ELCR contains invalid data... not using ELCR\n"); + else { + printk("Using ELCR to identify PCI interrupts\n"); + ELCR_fallback = 1; + } + } + + for (i = 0; i < 16; i++) { + switch (mpc_default_type) { + case 2: + if (i == 0 || i == 13) + continue; /* IRQ0 & IRQ13 not connected */ + /* fall through */ + default: + if (i == 2) + continue; /* IRQ2 is never connected */ + } + + if (ELCR_fallback) { + /* + * If the ELCR indicates a level-sensitive interrupt, we + * copy that information over to the MP table in the + * irqflag field (level sensitive, active high polarity). + */ + if (ELCR_trigger(i)) + intsrc.mpc_irqflag = 13; + else + intsrc.mpc_irqflag = 0; + } + + intsrc.mpc_srcbusirq = i; + intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ + MP_intsrc_info(&intsrc); + } + + intsrc.mpc_irqtype = mp_ExtINT; + intsrc.mpc_srcbusirq = 0; + intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ + MP_intsrc_info(&intsrc); +} + +static inline void __init construct_default_ISA_mptable(int mpc_default_type) +{ + struct mpc_config_processor processor; + struct mpc_config_bus bus; + struct mpc_config_ioapic ioapic; + struct mpc_config_lintsrc lintsrc; + int linttypes[2] = { mp_ExtINT, mp_NMI }; + int i; + struct { + int mp_bus_id_to_type[MAX_MP_BUSSES]; + int mp_bus_id_to_node[MAX_MP_BUSSES]; + int mp_bus_id_to_local[MAX_MP_BUSSES]; + int mp_bus_id_to_pci_bus[MAX_MP_BUSSES]; + struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + } *bus_data; + + bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(sizeof(*bus_data))); + if (!bus_data) + panic("SMP mptable: out of memory!\n"); + mp_bus_id_to_type = bus_data->mp_bus_id_to_type; + mp_bus_id_to_node = bus_data->mp_bus_id_to_node; + mp_bus_id_to_local = bus_data->mp_bus_id_to_local; + mp_bus_id_to_pci_bus = bus_data->mp_bus_id_to_pci_bus; + mp_irqs = bus_data->mp_irqs; + for (i = 0; i < MAX_MP_BUSSES; ++i) + mp_bus_id_to_pci_bus[i] = -1; + + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* + * 2 CPUs, numbered 0 & 1. + */ + processor.mpc_type = MP_PROCESSOR; + /* Either an integrated APIC or a discrete 82489DX. */ + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + processor.mpc_cpuflag = CPU_ENABLED; + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | + boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + for (i = 0; i < 2; i++) { + processor.mpc_apicid = i; + MP_processor_info(&processor); + } + + bus.mpc_type = MP_BUS; + bus.mpc_busid = 0; + switch (mpc_default_type) { + default: + printk("???\nUnknown standard configuration %d\n", + mpc_default_type); + /* fall through */ + case 1: + case 5: + memcpy(bus.mpc_bustype, "ISA ", 6); + break; + case 2: + case 6: + case 3: + memcpy(bus.mpc_bustype, "EISA ", 6); + break; + case 4: + case 7: + memcpy(bus.mpc_bustype, "MCA ", 6); + } + MP_bus_info(&bus); + if (mpc_default_type > 4) { + bus.mpc_busid = 1; + memcpy(bus.mpc_bustype, "PCI ", 6); + MP_bus_info(&bus); + } + + ioapic.mpc_type = MP_IOAPIC; + ioapic.mpc_apicid = 2; + ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + ioapic.mpc_flags = MPC_APIC_USABLE; + ioapic.mpc_apicaddr = 0xFEC00000; + MP_ioapic_info(&ioapic); + + /* + * We set up most of the low 16 IO-APIC pins according to MPS rules. + */ + construct_default_ioirq_mptable(mpc_default_type); + + lintsrc.mpc_type = MP_LINTSRC; + lintsrc.mpc_irqflag = 0; /* conforming */ + lintsrc.mpc_srcbusid = 0; + lintsrc.mpc_srcbusirq = 0; + lintsrc.mpc_destapic = MP_APIC_ALL; + for (i = 0; i < 2; i++) { + lintsrc.mpc_irqtype = linttypes[i]; + lintsrc.mpc_destapiclint = i; + MP_lintsrc_info(&lintsrc); + } +} + +static struct intel_mp_floating *mpf_found; + +/* + * Scan the memory blocks for an SMP configuration block. + */ +void __init get_smp_config (void) +{ + struct intel_mp_floating *mpf = mpf_found; + + /* + * ACPI may be used to obtain the entire SMP configuration or just to + * enumerate/configure processors (CONFIG_ACPI_HT_ONLY). Note that + * ACPI supports both logical (e.g. Hyper-Threading) and physical + * processors, where MPS only supports physical. + */ + if (acpi_lapic && acpi_ioapic) { + printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); + return; + } + else if (acpi_lapic) + printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); + + printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); + if (mpf->mpf_feature2 & (1<<7)) { + printk(" IMCR and PIC compatibility mode.\n"); + pic_mode = 1; + } else { + printk(" Virtual Wire compatibility mode.\n"); + pic_mode = 0; + } + + /* + * Now see if we need to read further. + */ + if (mpf->mpf_feature1 != 0) { + + printk("Default MP configuration #%d\n", mpf->mpf_feature1); + construct_default_ISA_mptable(mpf->mpf_feature1); + + } else if (mpf->mpf_physptr) { + + /* + * Read the physical hardware table. Anything here will + * override the defaults. + */ + if (!smp_read_mpc((void *)mpf->mpf_physptr)) { + smp_found_config = 0; + printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); + printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); + return; + } + /* + * If there are no explicit MP IRQ entries, then we are + * broken. We set up most of the low 16 IO-APIC pins to + * ISA defaults and hope it will work. + */ + if (!mp_irq_entries) { + struct mpc_config_bus bus; + + printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); + + bus.mpc_type = MP_BUS; + bus.mpc_busid = 0; + memcpy(bus.mpc_bustype, "ISA ", 6); + MP_bus_info(&bus); + + construct_default_ioirq_mptable(0); + } + + } else + BUG(); + + printk("Processors: %d\n", num_processors); + /* + * Only use the first configuration found. + */ +} + +static int __init smp_scan_config (unsigned long base, unsigned long length) +{ + unsigned long *bp = phys_to_virt(base); + struct intel_mp_floating *mpf; + + Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); + if (sizeof(*mpf) != 16) + printk("Error: MPF size\n"); + + while (length > 0) { + mpf = (struct intel_mp_floating *)bp; + if ((*bp == SMP_MAGIC_IDENT) && + (mpf->mpf_length == 1) && + !mpf_checksum((unsigned char *)bp, 16) && + ((mpf->mpf_specification == 1) + || (mpf->mpf_specification == 4)) ) { + + smp_found_config = 1; + printk("found SMP MP-table at %08lx\n", + virt_to_phys(mpf)); + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); + if (mpf->mpf_physptr) + reserve_bootmem((unsigned long)mpf->mpf_physptr, PAGE_SIZE); + mpf_found = mpf; + return 1; + } + bp += 4; + length -= 16; + } + return 0; +} + +void __init find_intel_smp (void) +{ + unsigned int address; + + /* + * FIXME: Linux assumes you have 640K of base ram.. + * this continues the error... + * + * 1) Scan the bottom 1K for a signature + * 2) Scan the top 1K of base RAM + * 3) Scan the 64K of bios + */ + if (smp_scan_config(0x0,0x400) || + smp_scan_config(639*0x400,0x400) || + smp_scan_config(0xF0000,0x10000)) + return; + /* + * If it is an SMP machine we should know now, unless the + * configuration is in an EISA/MCA bus machine with an + * extended bios data area. + * + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E, calculate and scan it here. + * + * NOTE! There were Linux loaders that will corrupt the EBDA + * area, and as such this kind of SMP config may be less + * trustworthy, simply because the SMP table may have been + * stomped on during early boot. Thankfully the bootloaders + * now honour the EBDA. + */ + + address = *(unsigned short *)phys_to_virt(0x40E); + address <<= 4; + smp_scan_config(address, 0x1000); +} + +#else + +/* + * The Visual Workstation is Intel MP compliant in the hardware + * sense, but it doesn't have a BIOS(-configuration table). + * No problem for Linux. + */ +void __init find_visws_smp(void) +{ + smp_found_config = 1; + + phys_cpu_present_map |= 2; /* or in id 1 */ + apic_version[1] |= 0x10; /* integrated APIC */ + apic_version[0] |= 0x10; + + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; +} + +#endif + +/* + * - Intel MP Configuration Table + * - or SGI Visual Workstation configuration + */ +void __init find_smp_config (void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + find_intel_smp(); +#endif +#ifdef CONFIG_VISWS + find_visws_smp(); +#endif +} + + +/* -------------------------------------------------------------------------- + ACPI-based MP Configuration + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI_BOOT + +void __init mp_register_lapic_address ( + u64 address) +{ + mp_lapic_addr = (unsigned long) address; + + set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); + + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + + Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); +} + + +void __init mp_register_lapic ( + u8 id, + u8 enabled) +{ + struct mpc_config_processor processor; + int boot_cpu = 0; + + if (id >= MAX_APICS) { + printk(KERN_WARNING "Processor #%d invalid (max %d)\n", + id, MAX_APICS); + return; + } + + if (id == boot_cpu_physical_apicid) + boot_cpu = 1; + + processor.mpc_type = MP_PROCESSOR; + processor.mpc_apicid = id; + + /* + * mp_register_lapic_address() which is called before the + * current function does the fixmap of FIX_APIC_BASE. + * Read in the correct APIC version from there + */ + processor.mpc_apicver = apic_read(APIC_LVR); + + processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); + processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + + MP_processor_info(&processor); +} + +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) + +#define MP_ISA_BUS 0 +#define MP_MAX_IOAPIC_PIN 127 + +struct mp_ioapic_routing { + int apic_id; + int irq_start; + int irq_end; + u32 pin_programmed[4]; +} mp_ioapic_routing[MAX_IO_APICS]; + + +static int __init mp_find_ioapic ( + int irq) +{ + int i = 0; + + /* Find the IOAPIC that manages this IRQ. */ + for (i = 0; i < nr_ioapics; i++) { + if ((irq >= mp_ioapic_routing[i].irq_start) + && (irq <= mp_ioapic_routing[i].irq_end)) + return i; + } + + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for IRQ %d\n", irq); + + return -1; +} + + +void __init mp_register_ioapic ( + u8 id, + u32 address, + u32 irq_base) +{ + int idx = 0; + + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " + "(found %d)\n", MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!\n"); + } + if (!address) { + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" + " found in MADT table, skipping!\n"); + return; + } + + idx = nr_ioapics++; + + mp_ioapics[idx].mpc_type = MP_IOAPIC; + mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; + mp_ioapics[idx].mpc_apicaddr = address; + + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); + + /* + * Build basic IRQ lookup table to facilitate irq->io_apic lookups + * and to prevent reprogramming of IOAPIC pins (PCI IRQs). + */ + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; + mp_ioapic_routing[idx].irq_start = irq_base; + mp_ioapic_routing[idx].irq_end = irq_base + + io_apic_get_redir_entries(idx); + + printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " + "IRQ %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + mp_ioapic_routing[idx].irq_start, + mp_ioapic_routing[idx].irq_end); + + return; +} + + +void __init mp_override_legacy_irq ( + u8 bus_irq, + u8 polarity, + u8 trigger, + u32 global_irq) +{ + struct mpc_config_intsrc intsrc; + int i = 0; + int found = 0; + int ioapic = -1; + int pin = -1; + + /* + * Convert 'global_irq' to 'ioapic.pin'. + */ + ioapic = mp_find_ioapic(global_irq); + if (ioapic < 0) + return; + pin = global_irq - mp_ioapic_routing[ioapic].irq_start; + + /* + * TBD: This check is for faulty timer entries, where the override + * erroneously sets the trigger to level, resulting in a HUGE + * increase of timer interrupts! + */ + if ((bus_irq == 0) && (global_irq == 2) && (trigger == 3)) + trigger = 1; + + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqtype = mp_INT; + intsrc.mpc_irqflag = (trigger << 2) | polarity; + intsrc.mpc_srcbus = MP_ISA_BUS; + intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ + intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ + intsrc.mpc_dstirq = pin; /* INTIN# */ + + Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", + intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, + (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, + intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); + + /* + * If an existing [IOAPIC.PIN -> IRQ] routing entry exists we override it. + * Otherwise create a new entry (e.g. global_irq == 2). + */ + for (i = 0; i < mp_irq_entries; i++) { + if ((mp_irqs[i].mpc_dstapic == intsrc.mpc_dstapic) + && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) { + mp_irqs[i] = intsrc; + found = 1; + break; + } + } + if (!found) { + mp_irqs[mp_irq_entries] = intsrc; + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!\n"); + } + + return; +} + + +void __init mp_config_acpi_legacy_irqs (void) +{ + int i = 0; + int ioapic = -1; + + /* + * Initialize mp_irqs for IRQ configuration. + */ + unsigned char *bus_data; + int count; + + count = (MAX_MP_BUSSES * sizeof(int)) * 4; + count += (MAX_IRQ_SOURCES * sizeof(int)) * 4; + bus_data = (void *)__get_free_pages(GFP_KERNEL, get_order(count)); + if (!bus_data) { + panic("Fatal: can't allocate bus memory for ACPI legacy IRQ!"); + } + mp_bus_id_to_type = (int *)&bus_data[0]; + mp_bus_id_to_node = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int))]; + mp_bus_id_to_local = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 2]; + mp_bus_id_to_pci_bus = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 3]; + mp_irqs = (struct mpc_config_intsrc *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 4]; + for (i = 0; i < MAX_MP_BUSSES; ++i) + mp_bus_id_to_pci_bus[i] = -1; + + /* + * Fabricate the legacy ISA bus (bus #31). + */ + mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; + Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); + + /* + * Locate the IOAPIC that manages the ISA IRQs (0-15). + */ + ioapic = mp_find_ioapic(0); + if (ioapic < 0) + return; + + /* + * Use the default configuration for the IRQs 0-15. These may be + * overriden by (MADT) interrupt source override entries. + */ + for (i = 0; i < 16; i++) { + + if (i == 2) continue; /* Don't connect IRQ2 */ + + mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC; + mp_irqs[mp_irq_entries].mpc_irqflag = 0; /* Conforming */ + mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; + mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; + mp_irqs[mp_irq_entries].mpc_irqtype = i ? mp_INT : mp_ExtINT; /* 8259A to #0 */ + mp_irqs[mp_irq_entries].mpc_srcbusirq = i; /* Identity mapped */ + mp_irqs[mp_irq_entries].mpc_dstirq = i; + + Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " + "%d-%d\n", + mp_irqs[mp_irq_entries].mpc_irqtype, + mp_irqs[mp_irq_entries].mpc_irqflag & 3, + (mp_irqs[mp_irq_entries].mpc_irqflag >> 2) & 3, + mp_irqs[mp_irq_entries].mpc_srcbus, + mp_irqs[mp_irq_entries].mpc_srcbusirq, + mp_irqs[mp_irq_entries].mpc_dstapic, + mp_irqs[mp_irq_entries].mpc_dstirq); + + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!\n"); + } +} + +/*extern FADT_DESCRIPTOR acpi_fadt;*/ + +void __init mp_config_ioapic_for_sci(int irq) +{ + int ioapic; + int ioapic_pin; + struct acpi_table_madt* madt; + struct acpi_table_int_src_ovr *entry = NULL; + acpi_interrupt_flags flags; + void *madt_end; + acpi_status status; + + /* + * Ensure that if there is an interrupt source override entry + * for the ACPI SCI, we leave it as is. Unfortunately this involves + * walking the MADT again. + */ + status = acpi_get_firmware_table("APIC", 1, ACPI_LOGICAL_ADDRESSING, + (struct acpi_table_header **) &madt); + if (ACPI_SUCCESS(status)) { + madt_end = (void *) (unsigned long)madt + madt->header.length; + + entry = (struct acpi_table_int_src_ovr *) + ((unsigned long) madt + sizeof(struct acpi_table_madt)); + + while ((void *) entry < madt_end) { + if (entry->header.type == ACPI_MADT_INT_SRC_OVR && + acpi_fadt.sci_int == entry->bus_irq) + goto found; + + entry = (struct acpi_table_int_src_ovr *) + ((unsigned long) entry + entry->header.length); + } + } + /* + * Although the ACPI spec says that the SCI should be level/low + * don't reprogram it unless there is an explicit MADT OVR entry + * instructing us to do so -- otherwise we break Tyan boards which + * have the SCI wired edge/high but no MADT OVR. + */ + return; + +found: + /* + * See the note at the end of ACPI 2.0b section + * 5.2.10.8 for what this is about. + */ + flags = entry->flags; + acpi_fadt.sci_int = entry->global_irq; + irq = entry->global_irq; + + ioapic = mp_find_ioapic(irq); + + ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; + + /* + * MPS INTI flags: + * trigger: 0=default, 1=edge, 3=level + * polarity: 0=default, 1=high, 3=low + * Per ACPI spec, default for SCI means level/low. + */ + io_apic_set_pci_routing(ioapic, ioapic_pin, irq, + (flags.trigger == 1 ? 0 : 1), (flags.polarity == 1 ? 0 : 1)); +} + + +#ifdef CONFIG_ACPI_PCI + +void __init mp_parse_prt (void) +{ + struct list_head *node = NULL; + struct acpi_prt_entry *entry = NULL; + int ioapic = -1; + int ioapic_pin = 0; + int irq = 0; + int idx, bit = 0; + int edge_level = 0; + int active_high_low = 0; + + /* + * Parsing through the PCI Interrupt Routing Table (PRT) and program + * routing for all entries. + */ + list_for_each(node, &acpi_prt.entries) { + entry = list_entry(node, struct acpi_prt_entry, node); + + /* Need to get irq for dynamic entry */ + if (entry->link.handle) { + irq = acpi_pci_link_get_irq(entry->link.handle, entry->link.index, &edge_level, &active_high_low); + if (!irq) + continue; + } + else { + /* Hardwired IRQ. Assume PCI standard settings */ + irq = entry->link.index; + edge_level = 1; + active_high_low = 1; + } + + /* Don't set up the ACPI SCI because it's already set up */ + if (acpi_fadt.sci_int == irq) { + entry->irq = irq; /*we still need to set entry's irq*/ + continue; + } + + ioapic = mp_find_ioapic(irq); + if (ioapic < 0) + continue; + ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; + + /* + * Avoid pin reprogramming. PRTs typically include entries + * with redundant pin->irq mappings (but unique PCI devices); + * we only only program the IOAPIC on the first. + */ + bit = ioapic_pin % 32; + idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); + if (idx > 3) { + printk(KERN_ERR "Invalid reference to IOAPIC pin " + "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, + ioapic_pin); + continue; + } + if ((1<irq = irq; + continue; + } + + mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<irq = irq; + + printk(KERN_DEBUG "%02x:%02x:%02x[%c] -> %d-%d -> IRQ %d\n", + entry->id.segment, entry->id.bus, + entry->id.device, ('A' + entry->pin), + mp_ioapic_routing[ioapic].apic_id, ioapic_pin, + entry->irq); + } + + print_IO_APIC(); + + return; +} + +#endif /*CONFIG_ACPI_PCI*/ + +#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ + +#endif /*CONFIG_ACPI*/ diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c new file mode 100644 index 0000000000..8422367492 --- /dev/null +++ b/xen/arch/x86/nmi.c @@ -0,0 +1,324 @@ +/* + * linux/arch/i386/nmi.c + * + * NMI watchdog support on APIC systems + * + * Started by Ingo Molnar + * + * Fixes: + * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. + * Mikael Pettersson : Power Management for local APIC NMI watchdog. + * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. + * Keir Fraser : Pentium 4 Hyperthreading support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +unsigned int nmi_watchdog = NMI_NONE; +unsigned int watchdog_on = 0; +static unsigned int nmi_hz = HZ; +unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +extern void show_registers(struct pt_regs *regs); + +extern int logical_proc_id[]; + +#define K7_EVNTSEL_ENABLE (1 << 22) +#define K7_EVNTSEL_INT (1 << 20) +#define K7_EVNTSEL_OS (1 << 17) +#define K7_EVNTSEL_USR (1 << 16) +#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 +#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING + +#define P6_EVNTSEL0_ENABLE (1 << 22) +#define P6_EVNTSEL_INT (1 << 20) +#define P6_EVNTSEL_OS (1 << 17) +#define P6_EVNTSEL_USR (1 << 16) +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 +#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED + +#define MSR_P4_MISC_ENABLE 0x1A0 +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) +#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) +#define MSR_P4_PERFCTR0 0x300 +#define MSR_P4_CCCR0 0x360 +#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) +#define P4_ESCR_OS0 (1<<3) +#define P4_ESCR_USR0 (1<<2) +#define P4_ESCR_OS1 (1<<1) +#define P4_ESCR_USR1 (1<<0) +#define P4_CCCR_OVF_PMI0 (1<<26) +#define P4_CCCR_OVF_PMI1 (1<<27) +#define P4_CCCR_THRESHOLD(N) ((N)<<20) +#define P4_CCCR_COMPLEMENT (1<<19) +#define P4_CCCR_COMPARE (1<<18) +#define P4_CCCR_REQUIRED (3<<16) +#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) +#define P4_CCCR_ENABLE (1<<12) +/* + * Set up IQ_COUNTER{0,1} to behave like a clock, by having IQ_CCCR{0,1} filter + * CRU_ESCR0 (with any non-null event selector) through a complemented + * max threshold. [IA32-Vol3, Section 14.9.9] + */ +#define MSR_P4_IQ_COUNTER0 0x30C +#define MSR_P4_IQ_COUNTER1 0x30D +#define MSR_P4_IQ_CCCR0 0x36C +#define MSR_P4_IQ_CCCR1 0x36D +#define MSR_P4_CRU_ESCR0 0x3B8 /* ESCR no. 4 */ +#define P4_NMI_CRU_ESCR0 \ + (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS0|P4_ESCR_USR0| \ + P4_ESCR_OS1|P4_ESCR_USR1) +#define P4_NMI_IQ_CCCR0 \ + (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ + P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) +#define P4_NMI_IQ_CCCR1 \ + (P4_CCCR_OVF_PMI1|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ + P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) + +int __init check_nmi_watchdog (void) +{ + unsigned int prev_nmi_count[NR_CPUS]; + int j, cpu; + + if ( !nmi_watchdog ) + return 0; + + printk("Testing NMI watchdog --- "); + + for ( j = 0; j < smp_num_cpus; j++ ) + { + cpu = cpu_logical_map(j); + prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count; + } + sti(); + mdelay((10*1000)/nmi_hz); /* wait 10 ticks */ + + for ( j = 0; j < smp_num_cpus; j++ ) + { + cpu = cpu_logical_map(j); + if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 ) + printk("CPU#%d stuck. ", cpu); + else + printk("CPU#%d okay. ", cpu); + } + + printk("\n"); + + /* now that we know it works we can reduce NMI frequency to + something more reasonable; makes a difference in some configs */ + if ( nmi_watchdog == NMI_LOCAL_APIC ) + nmi_hz = 1; + + return 0; +} + +static inline void nmi_pm_init(void) { } +#define __pminit __init + +/* + * Activate the NMI watchdog via the local APIC. + * Original code written by Keith Owens. + */ + +static void __pminit clear_msr_range(unsigned int base, unsigned int n) +{ + unsigned int i; + for ( i = 0; i < n; i++ ) + wrmsr(base+i, 0, 0); +} + +static void __pminit setup_k7_watchdog(void) +{ + unsigned int evntsel; + + nmi_perfctr_msr = MSR_K7_PERFCTR0; + + clear_msr_range(MSR_K7_EVNTSEL0, 4); + clear_msr_range(MSR_K7_PERFCTR0, 4); + + evntsel = K7_EVNTSEL_INT + | K7_EVNTSEL_OS + | K7_EVNTSEL_USR + | K7_NMI_EVENT; + + wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); + Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); + wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= K7_EVNTSEL_ENABLE; + wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); +} + +static void __pminit setup_p6_watchdog(void) +{ + unsigned int evntsel; + + nmi_perfctr_msr = MSR_P6_PERFCTR0; + + clear_msr_range(MSR_P6_EVNTSEL0, 2); + clear_msr_range(MSR_P6_PERFCTR0, 2); + + evntsel = P6_EVNTSEL_INT + | P6_EVNTSEL_OS + | P6_EVNTSEL_USR + | P6_NMI_EVENT; + + wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); + Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); + wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= P6_EVNTSEL0_ENABLE; + wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); +} + +static int __pminit setup_p4_watchdog(void) +{ + unsigned int misc_enable, dummy; + + rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); + if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) + return 0; + + nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; + + if ( logical_proc_id[smp_processor_id()] == 0 ) + { + if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) + clear_msr_range(0x3F1, 2); + /* MSR 0x3F0 seems to have a default value of 0xFC00, but current + docs doesn't fully define it, so leave it alone for now. */ + clear_msr_range(0x3A0, 31); + clear_msr_range(0x3C0, 6); + clear_msr_range(0x3C8, 6); + clear_msr_range(0x3E0, 2); + clear_msr_range(MSR_P4_CCCR0, 18); + clear_msr_range(MSR_P4_PERFCTR0, 18); + + wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); + wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); + Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); + wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); + apic_write(APIC_LVTPC, APIC_DM_NMI); + wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); + } + else if ( logical_proc_id[smp_processor_id()] == 1 ) + { + wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1 & ~P4_CCCR_ENABLE, 0); + Dprintk("setting P4_IQ_COUNTER2 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); + wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1); + apic_write(APIC_LVTPC, APIC_DM_NMI); + wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0); + } + else + { + return 0; + } + + return 1; +} + +void __pminit setup_apic_nmi_watchdog(void) +{ + if (!nmi_watchdog) + return; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) + return; + setup_k7_watchdog(); + break; + case X86_VENDOR_INTEL: + switch (boot_cpu_data.x86) { + case 6: + setup_p6_watchdog(); + break; + case 15: + if (!setup_p4_watchdog()) + return; + break; + default: + return; + } + break; + default: + return; + } + nmi_pm_init(); +} + + +static unsigned int +last_irq_sums [NR_CPUS], + alert_counter [NR_CPUS]; + +void touch_nmi_watchdog (void) +{ + int i; + for (i = 0; i < smp_num_cpus; i++) + alert_counter[i] = 0; +} + +void nmi_watchdog_tick (struct pt_regs * regs) +{ + extern spinlock_t console_lock; + extern void die(const char * str, struct pt_regs * regs, long err); + + int sum, cpu = smp_processor_id(); + + sum = apic_timer_irqs[cpu]; + + if ( (last_irq_sums[cpu] == sum) && watchdog_on ) + { + /* + * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds) + * before doing the oops ... + */ + alert_counter[cpu]++; + if (alert_counter[cpu] == 5*nmi_hz) { + console_lock = SPIN_LOCK_UNLOCKED; + die("NMI Watchdog detected LOCKUP on CPU", regs, cpu); + } + } + else + { + last_irq_sums[cpu] = sum; + alert_counter[cpu] = 0; + } + + if ( nmi_perfctr_msr ) + { + if ( nmi_perfctr_msr == MSR_P4_IQ_COUNTER0 ) + { + if ( logical_proc_id[cpu] == 0 ) + { + wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); + apic_write(APIC_LVTPC, APIC_DM_NMI); + wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); + } + else + { + wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0); + apic_write(APIC_LVTPC, APIC_DM_NMI); + wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1); + } + } + else + { + wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); + } + } +} diff --git a/xen/arch/x86/pci-irq.c b/xen/arch/x86/pci-irq.c new file mode 100644 index 0000000000..0a6b84c9ba --- /dev/null +++ b/xen/arch/x86/pci-irq.c @@ -0,0 +1,1092 @@ +/* + * Low-Level PCI Support for PC -- Routing of Interrupts + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "pci-x86.h" + +#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) +#define PIRQ_VERSION 0x0100 + +int broken_hp_bios_irq9; + +static struct irq_routing_table *pirq_table; + +/* + * Never use: 0, 1, 2 (timer, keyboard, and cascade) + * Avoid using: 13, 14 and 15 (FP error and IDE). + * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse) + */ +unsigned int pcibios_irq_mask = 0xfff8; + +static int pirq_penalty[16] = { + 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000, + 0, 0, 0, 0, 1000, 100000, 100000, 100000 +}; + +struct irq_router { + char *name; + u16 vendor, device; + int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); + int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); +}; + +struct irq_router_handler { + u16 vendor; + int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); +}; + +/* + * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table. + */ + +static struct irq_routing_table * __init pirq_find_routing_table(void) +{ + u8 *addr; + struct irq_routing_table *rt; + int i; + u8 sum; + + for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { + rt = (struct irq_routing_table *) addr; + if (rt->signature != PIRQ_SIGNATURE || + rt->version != PIRQ_VERSION || + rt->size % 16 || + rt->size < sizeof(struct irq_routing_table)) + continue; + sum = 0; + for(i=0; isize; i++) + sum += addr[i]; + if (!sum) { + DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt); + return rt; + } + } + return NULL; +} + +/* + * If we have a IRQ routing table, use it to search for peer host + * bridges. It's a gross hack, but since there are no other known + * ways how to get a list of buses, we have to go this way. + */ + +static void __init pirq_peer_trick(void) +{ + struct irq_routing_table *rt = pirq_table; + u8 busmap[256]; + int i; + struct irq_info *e; + + memset(busmap, 0, sizeof(busmap)); + for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { + e = &rt->slots[i]; +#ifdef DEBUG + { + int j; + DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); + for(j=0; j<4; j++) + DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); + DBG("\n"); + } +#endif + busmap[e->bus] = 1; + } + for(i=1; i<256; i++) + /* + * It might be a secondary bus, but in this case its parent is already + * known (ascending bus order) and therefore pci_scan_bus returns immediately. + */ + if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL)) + printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i); + pcibios_last_bus = -1; +} + +/* + * Code for querying and setting of IRQ routes on various interrupt routers. + */ + +void eisa_set_level_irq(unsigned int irq) +{ + unsigned char mask = 1 << (irq & 7); + unsigned int port = 0x4d0 + (irq >> 3); + unsigned char val = inb(port); + + if (!(val & mask)) { + DBG(" -> edge"); + outb(val | mask, port); + } +} + +/* + * Common IRQ routing practice: nybbles in config space, + * offset by some magic constant. + */ +static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) +{ + u8 x; + unsigned reg = offset + (nr >> 1); + + pci_read_config_byte(router, reg, &x); + return (nr & 1) ? (x >> 4) : (x & 0xf); +} + +static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) +{ + u8 x; + unsigned reg = offset + (nr >> 1); + + pci_read_config_byte(router, reg, &x); + x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val); + pci_write_config_byte(router, reg, x); +} + +/* + * ALI pirq entries are damn ugly, and completely undocumented. + * This has been figured out from pirq tables, and it's not a pretty + * picture. + */ +static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; + + return irqmap[read_config_nybble(router, 0x48, pirq-1)]; +} + +static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 }; + unsigned int val = irqmap[irq]; + + if (val) { + write_config_nybble(router, 0x48, pirq-1, val); + return 1; + } + return 0; +} + +/* + * The Intel PIIX4 pirq rules are fairly simple: "pirq" is + * just a pointer to the config space. + */ +static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 x; + + pci_read_config_byte(router, pirq, &x); + return (x < 16) ? x : 0; +} + +static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + pci_write_config_byte(router, pirq, irq); + return 1; +} + +/* + * The VIA pirq rules are nibble-based, like ALI, + * but without the ugly irq number munging. + * However, PIRQD is in the upper instead of lower nibble. + */ +static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq); +} + +static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq); + return 1; +} + +/* + * ITE 8330G pirq rules are nibble-based + * FIXME: pirqmap may be { 1, 0, 3, 2 }, + * 2+3 are both mapped to irq 9 on my system + */ +static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static unsigned char pirqmap[4] = { 1, 0, 2, 3 }; + return read_config_nybble(router,0x43, pirqmap[pirq-1]); +} + +static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static unsigned char pirqmap[4] = { 1, 0, 2, 3 }; + write_config_nybble(router, 0x43, pirqmap[pirq-1], irq); + return 1; +} + +/* + * OPTI: high four bits are nibble pointer.. + * I wonder what the low bits do? + */ +static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0xb8, pirq >> 4); +} + +static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0xb8, pirq >> 4, irq); + return 1; +} + +/* + * Cyrix: nibble offset 0x5C + */ +static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0x5C, (pirq-1)^1); +} + +static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0x5C, (pirq-1)^1, irq); + return 1; +} + +/* + * PIRQ routing for SiS 85C503 router used in several SiS chipsets. + * We have to deal with the following issues here: + * - vendors have different ideas about the meaning of link values + * - some onboard devices (integrated in the chipset) have special + * links and are thus routed differently (i.e. not via PCI INTA-INTD) + * - different revision of the router have a different layout for + * the routing registers, particularly for the onchip devices + * + * For all routing registers the common thing is we have one byte + * per routeable link which is defined as: + * bit 7 IRQ mapping enabled (0) or disabled (1) + * bits [6:4] reserved (sometimes used for onchip devices) + * bits [3:0] IRQ to map to + * allowed: 3-7, 9-12, 14-15 + * reserved: 0, 1, 2, 8, 13 + * + * The config-space registers located at 0x41/0x42/0x43/0x44 are + * always used to route the normal PCI INT A/B/C/D respectively. + * Apparently there are systems implementing PCI routing table using + * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. + * We try our best to handle both link mappings. + * + * Currently (2003-05-21) it appears most SiS chipsets follow the + * definition of routing registers from the SiS-5595 southbridge. + * According to the SiS 5595 datasheets the revision id's of the + * router (ISA-bridge) should be 0x01 or 0xb0. + * + * Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1. + * Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets. + * They seem to work with the current routing code. However there is + * some concern because of the two USB-OHCI HCs (original SiS 5595 + * had only one). YMMV. + * + * Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1: + * + * 0x61: IDEIRQ: + * bits [6:5] must be written 01 + * bit 4 channel-select primary (0), secondary (1) + * + * 0x62: USBIRQ: + * bit 6 OHCI function disabled (0), enabled (1) + * + * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved + * + * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved + * + * We support USBIRQ (in addition to INTA-INTD) and keep the + * IDE, ACPI and DAQ routing untouched as set by the BIOS. + * + * Currently the only reported exception is the new SiS 65x chipset + * which includes the SiS 69x southbridge. Here we have the 85C503 + * router revision 0x04 and there are changes in the register layout + * mostly related to the different USB HCs with USB 2.0 support. + * + * Onchip routing for router rev-id 0x04 (try-and-error observation) + * + * 0x60/0x61/0x62/0x63: 1xEHCI and 3xOHCI (companion) USB-HCs + * bit 6-4 are probably unused, not like 5595 + */ + +#define PIRQ_SIS_IRQ_MASK 0x0f +#define PIRQ_SIS_IRQ_DISABLE 0x80 +#define PIRQ_SIS_USB_ENABLE 0x40 +#define PIRQ_SIS_DETECT_REGISTER 0x40 + +/* return value: + * -1 on error + * 0 for PCI INTA-INTD + * 0 or enable bit mask to check or set for onchip functions + */ +static inline int pirq_sis5595_onchip(int pirq, int *reg) +{ + int ret = -1; + + *reg = pirq; + switch(pirq) { + case 0x01: + case 0x02: + case 0x03: + case 0x04: + *reg += 0x40; + case 0x41: + case 0x42: + case 0x43: + case 0x44: + ret = 0; + break; + + case 0x62: + ret = PIRQ_SIS_USB_ENABLE; /* documented for 5595 */ + break; + + case 0x61: + case 0x6a: + case 0x7e: + printk(KERN_INFO "SiS pirq: IDE/ACPI/DAQ mapping not implemented: (%u)\n", + (unsigned) pirq); + /* fall thru */ + default: + printk(KERN_INFO "SiS router unknown request: (%u)\n", + (unsigned) pirq); + break; + } + return ret; +} + +/* return value: + * -1 on error + * 0 for PCI INTA-INTD + * 0 or enable bit mask to check or set for onchip functions + */ +static inline int pirq_sis96x_onchip(int pirq, int *reg) +{ + int ret = -1; + + *reg = pirq; + switch(pirq) { + case 0x01: + case 0x02: + case 0x03: + case 0x04: + *reg += 0x40; + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + ret = 0; + break; + + default: + printk(KERN_INFO "SiS router unknown request: (%u)\n", + (unsigned) pirq); + break; + } + return ret; +} + + +static int pirq_sis5595_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 x; + int reg, check; + + check = pirq_sis5595_onchip(pirq, ®); + if (check < 0) + return 0; + + pci_read_config_byte(router, reg, &x); + if (check != 0 && !(x & check)) + return 0; + + return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK); +} + +static int pirq_sis96x_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 x; + int reg, check; + + check = pirq_sis96x_onchip(pirq, ®); + if (check < 0) + return 0; + + pci_read_config_byte(router, reg, &x); + if (check != 0 && !(x & check)) + return 0; + + return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK); +} + +static int pirq_sis5595_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + u8 x; + int reg, set; + + set = pirq_sis5595_onchip(pirq, ®); + if (set < 0) + return 0; + + x = (irq & PIRQ_SIS_IRQ_MASK); + if (x == 0) + x = PIRQ_SIS_IRQ_DISABLE; + else + x |= set; + + pci_write_config_byte(router, reg, x); + + return 1; +} + +static int pirq_sis96x_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + u8 x; + int reg, set; + + set = pirq_sis96x_onchip(pirq, ®); + if (set < 0) + return 0; + + x = (irq & PIRQ_SIS_IRQ_MASK); + if (x == 0) + x = PIRQ_SIS_IRQ_DISABLE; + else + x |= set; + + pci_write_config_byte(router, reg, x); + + return 1; +} + + +/* + * VLSI: nibble offset 0x74 - educated guess due to routing table and + * config space of VLSI 82C534 PCI-bridge/router (1004:0102) + * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard + * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6 + * for the busbridge to the docking station. + */ + +static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + if (pirq > 8) { + printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + return 0; + } + return read_config_nybble(router, 0x74, pirq-1); +} + +static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + if (pirq > 8) { + printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + return 0; + } + write_config_nybble(router, 0x74, pirq-1, irq); + return 1; +} + +/* + * ServerWorks: PCI interrupts mapped to system IRQ lines through Index + * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register + * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect + * register is a straight binary coding of desired PIC IRQ (low nibble). + * + * The 'link' value in the PIRQ table is already in the correct format + * for the Index register. There are some special index values: + * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1, + * and 0x03 for SMBus. + */ +static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + outb_p(pirq, 0xc00); + return inb(0xc01) & 0xf; +} + +static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + outb_p(pirq, 0xc00); + outb_p(irq, 0xc01); + return 1; +} + +/* Support for AMD756 PCI IRQ Routing + * Jhon H. Caicedo + * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced) + * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced) + * The AMD756 pirq rules are nibble-based + * offset 0x56 0-3 PIRQA 4-7 PIRQB + * offset 0x57 0-3 PIRQC 4-7 PIRQD + */ +static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 irq; + irq = 0; + if (pirq <= 4) + { + irq = read_config_nybble(router, 0x56, pirq - 1); + } + printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", + dev->vendor, dev->device, pirq, irq); + return irq; +} + +static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", + dev->vendor, dev->device, pirq, irq); + if (pirq <= 4) + { + write_config_nybble(router, 0x56, pirq - 1, irq); + } + return 1; +} + +#ifdef CONFIG_PCI_BIOS + +static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + struct pci_dev *bridge; + int pin = pci_get_interrupt_pin(dev, &bridge); + return pcibios_set_irq_routing(bridge, pin, irq); +} + +#endif + + +static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + /* We must not touch 440GX even if we have tables. 440GX has + different IRQ routing weirdness */ + if(pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0, NULL) || + pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2, NULL)) + return 0; + switch(device) + { + case PCI_DEVICE_ID_INTEL_82371FB_0: + case PCI_DEVICE_ID_INTEL_82371SB_0: + case PCI_DEVICE_ID_INTEL_82371AB_0: + case PCI_DEVICE_ID_INTEL_82371MX: + case PCI_DEVICE_ID_INTEL_82443MX_0: + case PCI_DEVICE_ID_INTEL_82801AA_0: + case PCI_DEVICE_ID_INTEL_82801AB_0: + case PCI_DEVICE_ID_INTEL_82801BA_0: + case PCI_DEVICE_ID_INTEL_82801BA_10: + case PCI_DEVICE_ID_INTEL_82801CA_0: + case PCI_DEVICE_ID_INTEL_82801CA_12: + case PCI_DEVICE_ID_INTEL_82801DB_0: + case PCI_DEVICE_ID_INTEL_82801E_0: + case PCI_DEVICE_ID_INTEL_82801EB_0: + case PCI_DEVICE_ID_INTEL_ESB_0: + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; + } + return 0; +} + +static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + /* FIXME: We should move some of the quirk fixup stuff here */ + switch(device) + { + case PCI_DEVICE_ID_VIA_82C586_0: + case PCI_DEVICE_ID_VIA_82C596: + case PCI_DEVICE_ID_VIA_82C686: + case PCI_DEVICE_ID_VIA_8231: + /* FIXME: add new ones for 8233/5 */ + r->name = "VIA"; + r->get = pirq_via_get; + r->set = pirq_via_set; + return 1; + } + return 0; +} + +static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_VLSI_82C534: + r->name = "VLSI 82C534"; + r->get = pirq_vlsi_get; + r->set = pirq_vlsi_set; + return 1; + } + return 0; +} + + +static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_SERVERWORKS_OSB4: + case PCI_DEVICE_ID_SERVERWORKS_CSB5: + r->name = "ServerWorks"; + r->get = pirq_serverworks_get; + r->set = pirq_serverworks_set; + return 1; + } + return 0; +} + +static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + u8 reg; + u16 devid; + + if (device != PCI_DEVICE_ID_SI_503) + return 0; + + /* + * In case of SiS south bridge, we need to detect the two + * kinds of routing tables we have seen so far (5595 and 96x). + * Since the maintain the same device ID, we need to do poke + * the PCI configuration space to find the router type we are + * dealing with. + */ + + /* + * Factoid: writing bit6 of register 0x40 of the router config space + * will make the SB to show up 0x096x inside the device id. Note, + * we need to restore register 0x40 after the device id poke. + */ + + pci_read_config_byte(router, PIRQ_SIS_DETECT_REGISTER, ®); + pci_write_config_byte(router, PIRQ_SIS_DETECT_REGISTER, reg | (1 << 6)); + pci_read_config_word(router, PCI_DEVICE_ID, &devid); + pci_write_config_byte(router, PIRQ_SIS_DETECT_REGISTER, reg); + + if ((devid & 0xfff0) == 0x0960) { + r->name = "SIS96x"; + r->get = pirq_sis96x_get; + r->set = pirq_sis96x_set; + DBG("PCI: Detecting SiS router at %02x:%02x : SiS096x detected\n", + rt->rtr_bus, rt->rtr_devfn); + } else { + r->name = "SIS5595"; + r->get = pirq_sis5595_get; + r->set = pirq_sis5595_set; + DBG("PCI: Detecting SiS router at %02x:%02x : SiS5595 detected\n", + rt->rtr_bus, rt->rtr_devfn); + } + return 1; +} + +static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_CYRIX_5520: + r->name = "NatSemi"; + r->get = pirq_cyrix_get; + r->set = pirq_cyrix_set; + return 1; + } + return 0; +} + +static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_OPTI_82C700: + r->name = "OPTI"; + r->get = pirq_opti_get; + r->set = pirq_opti_set; + return 1; + } + return 0; +} + +static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_ITE_IT8330G_0: + r->name = "ITE"; + r->get = pirq_ite_get; + r->set = pirq_ite_set; + return 1; + } + return 0; +} + +static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_AL_M1533: + r->name = "ALI"; + r->get = pirq_ali_get; + r->set = pirq_ali_set; + return 1; + /* Should add 156x some day */ + } + return 0; +} + +static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_AMD_VIPER_740B: + r->name = "AMD756"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7413: + r->name = "AMD766"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7443: + r->name = "AMD768"; + break; + default: + return 0; + } + r->get = pirq_amd756_get; + r->set = pirq_amd756_set; + return 1; +} + +static __initdata struct irq_router_handler pirq_routers[] = { + { PCI_VENDOR_ID_INTEL, intel_router_probe }, + { PCI_VENDOR_ID_AL, ali_router_probe }, + { PCI_VENDOR_ID_ITE, ite_router_probe }, + { PCI_VENDOR_ID_VIA, via_router_probe }, + { PCI_VENDOR_ID_OPTI, opti_router_probe }, + { PCI_VENDOR_ID_SI, sis_router_probe }, + { PCI_VENDOR_ID_CYRIX, cyrix_router_probe }, + { PCI_VENDOR_ID_VLSI, vlsi_router_probe }, + { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe }, + { PCI_VENDOR_ID_AMD, amd_router_probe }, + /* Someone with docs needs to add the ATI Radeon IGP */ + { 0, NULL } +}; +static struct irq_router pirq_router; +static struct pci_dev *pirq_router_dev; + +/* + * FIXME: should we have an option to say "generic for + * chipset" ? + */ + +static void __init pirq_find_router(struct irq_router *r) +{ + struct irq_routing_table *rt = pirq_table; + struct irq_router_handler *h; + +#ifdef CONFIG_PCI_BIOS + if (!rt->signature) { + printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n"); + r->set = pirq_bios_set; + r->name = "BIOS"; + return; + } +#endif + + /* Default unless a driver reloads it */ + r->name = "default"; + r->get = NULL; + r->set = NULL; + + DBG("PCI: Attempting to find IRQ router for %04x:%04x\n", + rt->rtr_vendor, rt->rtr_device); + + pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); + if (!pirq_router_dev) { + DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); + return; + } + + for( h = pirq_routers; h->vendor; h++) { + /* First look for a router match */ + if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) + break; + /* Fall back to a device match */ + if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device)) + break; + } + printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", + pirq_router.name, + pirq_router_dev->vendor, + pirq_router_dev->device, + pirq_router_dev->slot_name); +} + +static struct irq_info *pirq_get_info(struct pci_dev *dev) +{ + struct irq_routing_table *rt = pirq_table; + int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); + struct irq_info *info; + + for (info = rt->slots; entries--; info++) + if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) + return info; + return NULL; +} + +static void pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs) +{ +} + +static int pcibios_lookup_irq(struct pci_dev *dev, int assign) +{ + u8 pin; + struct irq_info *info; + int i, pirq, newirq; + int irq = 0; + u32 mask; + struct irq_router *r = &pirq_router; + struct pci_dev *dev2; + char *msg = NULL; + + if (!pirq_table) + return 0; + + /* Find IRQ routing entry */ + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (!pin) { + DBG(" -> no interrupt pin\n"); + return 0; + } + pin = pin - 1; + + DBG("IRQ for %s:%d", dev->slot_name, pin); + info = pirq_get_info(dev); + if (!info) { + DBG(" -> not found in routing table\n"); + return 0; + } + pirq = info->irq[pin].link; + mask = info->irq[pin].bitmap; + if (!pirq) { + DBG(" -> not routed\n"); + return 0; + } + DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); + mask &= pcibios_irq_mask; + + /* Work around broken HP Pavilion Notebooks which assign USB to + IRQ 9 even though it is actually wired to IRQ 11 */ + + if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) { + dev->irq = 11; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11); + r->set(pirq_router_dev, dev, pirq, 11); + } + + /* + * Find the best IRQ to assign: use the one + * reported by the device if possible. + */ + newirq = dev->irq; + if (!newirq && assign) { + for (i = 0; i < 16; i++) { + if (!(mask & (1 << i))) + continue; + if (pirq_penalty[i] < pirq_penalty[newirq] && + !request_irq(i, pcibios_test_irq_handler, SA_SHIRQ, "pci-test", dev)) { + free_irq(i, dev); + newirq = i; + } + } + } + DBG(" -> newirq=%d", newirq); + + /* Check if it is hardcoded */ + if ((pirq & 0xf0) == 0xf0) { + irq = pirq & 0xf; + DBG(" -> hardcoded IRQ %d\n", irq); + msg = "Hardcoded"; + } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq))) { + DBG(" -> got IRQ %d\n", irq); + msg = "Found"; + } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { + DBG(" -> assigning IRQ %d", newirq); + if (r->set(pirq_router_dev, dev, pirq, newirq)) { + eisa_set_level_irq(newirq); + DBG(" ... OK\n"); + msg = "Assigned"; + irq = newirq; + } + } + + if (!irq) { + DBG(" ... failed\n"); + if (newirq && mask == (1 << newirq)) { + msg = "Guessed"; + irq = newirq; + } else + return 0; + } + printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name); + + /* Update IRQ for all devices with the same pirq value */ + pci_for_each_dev(dev2) { + pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); + if (!pin) + continue; + pin--; + info = pirq_get_info(dev2); + if (!info) + continue; + if (info->irq[pin].link == pirq) { + /* We refuse to override the dev->irq information. Give a warning! */ + if (dev2->irq && dev2->irq != irq) { + printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", + dev2->slot_name, dev2->irq, irq); + continue; + } + dev2->irq = irq; + pirq_penalty[irq]++; + if (dev != dev2) + printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name); + } + } + return 1; +} + +void __init pcibios_irq_init(void) +{ + DBG("PCI: IRQ init\n"); + pirq_table = pirq_find_routing_table(); +#ifdef CONFIG_PCI_BIOS + if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) + pirq_table = pcibios_get_irq_routing_table(); +#endif + if (pirq_table) { + pirq_peer_trick(); + pirq_find_router(&pirq_router); + if (pirq_table->exclusive_irqs) { + int i; + for (i=0; i<16; i++) + if (!(pirq_table->exclusive_irqs & (1 << i))) + pirq_penalty[i] += 100; + } + /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ + if (io_apic_assign_pci_irqs) + pirq_table = NULL; + } +} + +void __init pcibios_fixup_irqs(void) +{ + struct pci_dev *dev; + u8 pin; + + DBG("PCI: IRQ fixup\n"); + pci_for_each_dev(dev) { + /* + * If the BIOS has set an out of range IRQ number, just ignore it. + * Also keep track of which IRQ's are already in use. + */ + if (dev->irq >= 16) { + DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq); + dev->irq = 0; + } + /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ + if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) + pirq_penalty[dev->irq] = 0; + pirq_penalty[dev->irq]++; + } + + pci_for_each_dev(dev) { + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); +#ifdef CONFIG_X86_IO_APIC + /* + * Recalculate IRQ numbers if we use the I/O APIC. + */ + if (io_apic_assign_pci_irqs) + { + int irq; + + if (pin) { + pin--; /* interrupt pins are numbered starting from 1 */ + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); + /* + * Busses behind bridges are typically not listed in the MP-table. + * In this case we have to look up the IRQ based on the parent bus, + * parent slot, and pin number. The SMP code detects such bridged + * busses itself so we should get into this branch reliably. + */ + if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ + struct pci_dev * bridge = dev->bus->self; + + pin = (pin + PCI_SLOT(dev->devfn)) % 4; + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + PCI_SLOT(bridge->devfn), pin); + if (irq >= 0) + printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n", + bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq); + } + if (irq >= 0) { + printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n", + dev->bus->number, PCI_SLOT(dev->devfn), pin, irq); + dev->irq = irq; + } + } + } +#endif + /* + * Still no IRQ? Try to lookup one... + */ + if (pin && !dev->irq) + pcibios_lookup_irq(dev, 0); + } +} + +void pcibios_penalize_isa_irq(int irq) +{ + /* + * If any ISAPnP device reports an IRQ in its list of possible + * IRQ's, we try to avoid assigning it to PCI devices. + */ + pirq_penalty[irq] += 100; +} + +void pcibios_enable_irq(struct pci_dev *dev) +{ + u8 pin; + extern int interrupt_line_quirk; + + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { + char *msg; + + /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ + if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) + return; + + if (io_apic_assign_pci_irqs) + msg = " Probably buggy MP table."; + else if (pci_probe & PCI_BIOS_IRQ_SCAN) + msg = ""; + else + msg = " Please try using pci=biosirq."; + printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", + 'A' + pin - 1, dev->slot_name, msg); + } + /* VIA bridges use interrupt line for apic/pci steering across + the V-Link */ + else if (interrupt_line_quirk) + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + +} diff --git a/xen/arch/x86/pci-pc.c b/xen/arch/x86/pci-pc.c new file mode 100644 index 0000000000..eac8d5bb4f --- /dev/null +++ b/xen/arch/x86/pci-pc.c @@ -0,0 +1,1538 @@ +/* + * Low-Level PCI Support for PC + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/*#include */ +#include +#include +#include + +#include "pci-x86.h" + +extern int numnodes; +#define __KERNEL_CS __HYPERVISOR_CS +#define __KERNEL_DS __HYPERVISOR_DS + +unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2; + +int pcibios_last_bus = -1; +struct pci_bus *pci_root_bus = NULL; +struct pci_ops *pci_root_ops = NULL; + +int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL; +int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL; + +static int pci_using_acpi_prt = 0; + +#ifdef CONFIG_MULTIQUAD +#define BUS2QUAD(global) (mp_bus_id_to_node[global]) +#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) +#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) +#else +#define BUS2QUAD(global) (0) +#define BUS2LOCAL(global) (global) +#define QUADLOCAL2BUS(quad,local) (local) +#endif + +/* + * This interrupt-safe spinlock protects all accesses to PCI + * configuration space. + */ +static spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED; + + +/* + * Functions for accessing PCI configuration space with type 1 accesses + */ + +#ifdef CONFIG_PCI_DIRECT + +#ifdef CONFIG_MULTIQUAD +#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \ + (0x80000000 | (BUS2LOCAL(bus) << 16) | (dev << 11) | (fn << 8) | (reg & ~3)) + +static int pci_conf1_mq_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* CONFIG_MULTIQUAD */ +{ + unsigned long flags; + + if (bus > 255 || dev > 31 || fn > 7 || reg > 255) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus)); + + switch (len) { + case 1: + *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus)); + break; + case 2: + *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus)); + break; + case 4: + *value = inl_quad(0xCFC, BUS2QUAD(bus)); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf1_mq_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* CONFIG_MULTIQUAD */ +{ + unsigned long flags; + + if (bus > 255 || dev > 31 || fn > 7 || reg > 255) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus)); + + switch (len) { + case 1: + outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus)); + break; + case 2: + outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus)); + break; + case 4: + outl_quad((u32)value, 0xCFC, BUS2QUAD(bus)); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf1_read_mq_config_byte(struct pci_dev *dev, int where, u8 *value) +{ + int result; + u32 data; + + result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, &data); + + *value = (u8)data; + + return result; +} + +static int pci_conf1_read_mq_config_word(struct pci_dev *dev, int where, u16 *value) +{ + int result; + u32 data; + + result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, &data); + + *value = (u16)data; + + return result; +} + +static int pci_conf1_read_mq_config_dword(struct pci_dev *dev, int where, u32 *value) +{ + if (!value) + return -EINVAL; + + return pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static int pci_conf1_write_mq_config_byte(struct pci_dev *dev, int where, u8 value) +{ + return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, value); +} + +static int pci_conf1_write_mq_config_word(struct pci_dev *dev, int where, u16 value) +{ + return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, value); +} + +static int pci_conf1_write_mq_config_dword(struct pci_dev *dev, int where, u32 value) +{ + return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static struct pci_ops pci_direct_mq_conf1 = { + pci_conf1_read_mq_config_byte, + pci_conf1_read_mq_config_word, + pci_conf1_read_mq_config_dword, + pci_conf1_write_mq_config_byte, + pci_conf1_write_mq_config_word, + pci_conf1_write_mq_config_dword +}; + +#endif /* !CONFIG_MULTIQUAD */ +#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \ + (0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3)) + +static int pci_conf1_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* !CONFIG_MULTIQUAD */ +{ + unsigned long flags; + + if (bus > 255 || dev > 31 || fn > 7 || reg > 255) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8); + + switch (len) { + case 1: + *value = inb(0xCFC + (reg & 3)); + break; + case 2: + *value = inw(0xCFC + (reg & 2)); + break; + case 4: + *value = inl(0xCFC); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf1_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* !CONFIG_MULTIQUAD */ +{ + unsigned long flags; + + if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8); + + switch (len) { + case 1: + outb((u8)value, 0xCFC + (reg & 3)); + break; + case 2: + outw((u16)value, 0xCFC + (reg & 2)); + break; + case 4: + outl((u32)value, 0xCFC); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +#undef PCI_CONF1_ADDRESS + +static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value) +{ + int result; + u32 data; + + result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, &data); + + *value = (u8)data; + + return result; +} + +static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value) +{ + int result; + u32 data; + + result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, &data); + + *value = (u16)data; + + return result; +} + +static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value) +{ + return pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value) +{ + return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, value); +} + +static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value) +{ + return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, value); +} + +static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value) +{ + return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static struct pci_ops pci_direct_conf1 = { + pci_conf1_read_config_byte, + pci_conf1_read_config_word, + pci_conf1_read_config_dword, + pci_conf1_write_config_byte, + pci_conf1_write_config_word, + pci_conf1_write_config_dword +}; + + +/* + * Functions for accessing PCI configuration space with type 2 accesses + */ + +#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg) + +static int pci_conf2_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) +{ + unsigned long flags; + + if (bus > 255 || dev > 31 || fn > 7 || reg > 255) + return -EINVAL; + + if (dev & 0x10) + return PCIBIOS_DEVICE_NOT_FOUND; + + spin_lock_irqsave(&pci_config_lock, flags); + + outb((u8)(0xF0 | (fn << 1)), 0xCF8); + outb((u8)bus, 0xCFA); + + switch (len) { + case 1: + *value = inb(PCI_CONF2_ADDRESS(dev, reg)); + break; + case 2: + *value = inw(PCI_CONF2_ADDRESS(dev, reg)); + break; + case 4: + *value = inl(PCI_CONF2_ADDRESS(dev, reg)); + break; + } + + outb (0, 0xCF8); + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf2_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) +{ + unsigned long flags; + + if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) + return -EINVAL; + + if (dev & 0x10) + return PCIBIOS_DEVICE_NOT_FOUND; + + spin_lock_irqsave(&pci_config_lock, flags); + + outb((u8)(0xF0 | (fn << 1)), 0xCF8); + outb((u8)bus, 0xCFA); + + switch (len) { + case 1: + outb ((u8)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + case 2: + outw ((u16)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + case 4: + outl ((u32)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + } + + outb (0, 0xCF8); + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +#undef PCI_CONF2_ADDRESS + +static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value) +{ + int result; + u32 data; + result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, &data); + *value = (u8)data; + return result; +} + +static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value) +{ + int result; + u32 data; + result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, &data); + *value = (u16)data; + return result; +} + +static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value) +{ + return pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value) +{ + return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, value); +} + +static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value) +{ + return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, value); +} + +static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value) +{ + return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static struct pci_ops pci_direct_conf2 = { + pci_conf2_read_config_byte, + pci_conf2_read_config_word, + pci_conf2_read_config_dword, + pci_conf2_write_config_byte, + pci_conf2_write_config_word, + pci_conf2_write_config_dword +}; + + +/* + * Before we decide to use direct hardware access mechanisms, we try to do some + * trivial checks to ensure it at least _seems_ to be working -- we just test + * whether bus 00 contains a host bridge (this is similar to checking + * techniques used in XFree86, but ours should be more reliable since we + * attempt to make use of direct access hints provided by the PCI BIOS). + * + * This should be close to trivial, but it isn't, because there are buggy + * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. + */ +static int __devinit pci_sanity_check(struct pci_ops *o) +{ + u16 x; + struct pci_bus bus; /* Fake bus and device */ + struct pci_dev dev; + + if (pci_probe & PCI_NO_CHECKS) + return 1; + bus.number = 0; + dev.bus = &bus; + for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++) + if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) && + (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) || + (!o->read_word(&dev, PCI_VENDOR_ID, &x) && + (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ))) + return 1; + DBG("PCI: Sanity check failed\n"); + return 0; +} + +static struct pci_ops * __devinit pci_check_direct(void) +{ + unsigned int tmp; + unsigned long flags; + + __save_flags(flags); __cli(); + + /* + * Check if configuration type 1 works. + */ + if (pci_probe & PCI_PROBE_CONF1) { + outb (0x01, 0xCFB); + tmp = inl (0xCF8); + outl (0x80000000, 0xCF8); + if (inl (0xCF8) == 0x80000000 && + pci_sanity_check(&pci_direct_conf1)) { + outl (tmp, 0xCF8); + __restore_flags(flags); + printk(KERN_INFO "PCI: Using configuration type 1\n"); + request_region(0xCF8, 8, "PCI conf1"); + +#ifdef CONFIG_MULTIQUAD + /* Multi-Quad has an extended PCI Conf1 */ + if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) + return &pci_direct_mq_conf1; +#endif + return &pci_direct_conf1; + } + outl (tmp, 0xCF8); + } + + /* + * Check if configuration type 2 works. + */ + if (pci_probe & PCI_PROBE_CONF2) { + outb (0x00, 0xCFB); + outb (0x00, 0xCF8); + outb (0x00, 0xCFA); + if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 && + pci_sanity_check(&pci_direct_conf2)) { + __restore_flags(flags); + printk(KERN_INFO "PCI: Using configuration type 2\n"); + request_region(0xCF8, 4, "PCI conf2"); + return &pci_direct_conf2; + } + } + + __restore_flags(flags); + return NULL; +} + +#endif + +/* + * BIOS32 and PCI BIOS handling. + */ + +#ifdef CONFIG_PCI_BIOS + +#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX +#define PCIBIOS_PCI_BIOS_PRESENT 0xb101 +#define PCIBIOS_FIND_PCI_DEVICE 0xb102 +#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103 +#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106 +#define PCIBIOS_READ_CONFIG_BYTE 0xb108 +#define PCIBIOS_READ_CONFIG_WORD 0xb109 +#define PCIBIOS_READ_CONFIG_DWORD 0xb10a +#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b +#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c +#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d +#define PCIBIOS_GET_ROUTING_OPTIONS 0xb10e +#define PCIBIOS_SET_PCI_HW_INT 0xb10f + +/* BIOS32 signature: "_32_" */ +#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) + +/* PCI signature: "PCI " */ +#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24)) + +/* PCI service signature: "$PCI" */ +#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24)) + +/* PCI BIOS hardware mechanism flags */ +#define PCIBIOS_HW_TYPE1 0x01 +#define PCIBIOS_HW_TYPE2 0x02 +#define PCIBIOS_HW_TYPE1_SPEC 0x10 +#define PCIBIOS_HW_TYPE2_SPEC 0x20 + +/* + * This is the standard structure used to identify the entry point + * to the BIOS32 Service Directory, as documented in + * Standard BIOS 32-bit Service Directory Proposal + * Revision 0.4 May 24, 1993 + * Phoenix Technologies Ltd. + * Norwood, MA + * and the PCI BIOS specification. + */ + +union bios32 { + struct { + unsigned long signature; /* _32_ */ + unsigned long entry; /* 32 bit physical address */ + unsigned char revision; /* Revision level, 0 */ + unsigned char length; /* Length in paragraphs should be 01 */ + unsigned char checksum; /* All bytes must add up to zero */ + unsigned char reserved[5]; /* Must be zero */ + } fields; + char chars[16]; +}; + +/* + * Physical address of the service directory. I don't know if we're + * allowed to have more than one of these or not, so just in case + * we'll make pcibios_present() take a memory start parameter and store + * the array there. + */ + +static struct { + unsigned long address; + unsigned short segment; +} bios32_indirect = { 0, __KERNEL_CS }; + +/* + * Returns the entry point for the given service, NULL on error + */ + +static unsigned long bios32_service(unsigned long service) +{ + unsigned char return_code; /* %al */ + unsigned long address; /* %ebx */ + unsigned long length; /* %ecx */ + unsigned long entry; /* %edx */ + unsigned long flags; + + __save_flags(flags); __cli(); + __asm__("lcall *(%%edi); cld" + : "=a" (return_code), + "=b" (address), + "=c" (length), + "=d" (entry) + : "0" (service), + "1" (0), + "D" (&bios32_indirect)); + __restore_flags(flags); + + switch (return_code) { + case 0: + return address + entry; + case 0x80: /* Not present */ + printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); + return 0; + default: /* Shouldn't happen */ + printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", + service, return_code); + return 0; + } +} + +static struct { + unsigned long address; + unsigned short segment; +} pci_indirect = { 0, __KERNEL_CS }; + +static int pci_bios_present; + +static int __devinit check_pcibios(void) +{ + u32 signature, eax, ebx, ecx; + u8 status, major_ver, minor_ver, hw_mech; + unsigned long flags, pcibios_entry; + + if ((pcibios_entry = bios32_service(PCI_SERVICE))) { + pci_indirect.address = pcibios_entry + PAGE_OFFSET; + + __save_flags(flags); __cli(); + __asm__( + "lcall *(%%edi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=d" (signature), + "=a" (eax), + "=b" (ebx), + "=c" (ecx) + : "1" (PCIBIOS_PCI_BIOS_PRESENT), + "D" (&pci_indirect) + : "memory"); + __restore_flags(flags); + + status = (eax >> 8) & 0xff; + hw_mech = eax & 0xff; + major_ver = (ebx >> 8) & 0xff; + minor_ver = ebx & 0xff; + if (pcibios_last_bus < 0) + pcibios_last_bus = ecx & 0xff; + DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n", + status, hw_mech, major_ver, minor_ver, pcibios_last_bus); + if (status || signature != PCI_SIGNATURE) { + printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n", + status, signature); + return 0; + } + printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n", + major_ver, minor_ver, pcibios_entry, pcibios_last_bus); +#ifdef CONFIG_PCI_DIRECT + if (!(hw_mech & PCIBIOS_HW_TYPE1)) + pci_probe &= ~PCI_PROBE_CONF1; + if (!(hw_mech & PCIBIOS_HW_TYPE2)) + pci_probe &= ~PCI_PROBE_CONF2; +#endif + return 1; + } + return 0; +} + +static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id, + unsigned short index, unsigned char *bus, unsigned char *device_fn) +{ + unsigned short bx; + unsigned short ret; + + __asm__("lcall *(%%edi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=b" (bx), + "=a" (ret) + : "1" (PCIBIOS_FIND_PCI_DEVICE), + "c" (device_id), + "d" (vendor), + "S" ((int) index), + "D" (&pci_indirect)); + *bus = (bx >> 8) & 0xff; + *device_fn = bx & 0xff; + return (int) (ret & 0xff00) >> 8; +} + +static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) +{ + unsigned long result = 0; + unsigned long flags; + unsigned long bx = ((bus << 8) | (dev << 3) | fn); + + if (bus > 255 || dev > 31 || fn > 7 || reg > 255) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + switch (len) { + case 1: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_BYTE), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 2: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_WORD), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 4: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_DWORD), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return (int)((result & 0xff00) >> 8); +} + +static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) +{ + unsigned long result = 0; + unsigned long flags; + unsigned long bx = ((bus << 8) | (dev << 3) | fn); + + if ((bus > 255 || dev > 31 || fn > 7 || reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + switch (len) { + case 1: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_BYTE), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 2: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_WORD), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 4: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_DWORD), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return (int)((result & 0xff00) >> 8); +} + +static int pci_bios_read_config_byte(struct pci_dev *dev, int where, u8 *value) +{ + int result; + u32 data; + + if (!value) + BUG(); + + result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, &data); + + *value = (u8)data; + + return result; +} + +static int pci_bios_read_config_word(struct pci_dev *dev, int where, u16 *value) +{ + int result; + u32 data; + + if (!value) + BUG(); + + result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, &data); + + *value = (u16)data; + + return result; +} + +static int pci_bios_read_config_dword(struct pci_dev *dev, int where, u32 *value) +{ + if (!value) + BUG(); + + return pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static int pci_bios_write_config_byte(struct pci_dev *dev, int where, u8 value) +{ + return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, value); +} + +static int pci_bios_write_config_word(struct pci_dev *dev, int where, u16 value) +{ + return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, value); +} + +static int pci_bios_write_config_dword(struct pci_dev *dev, int where, u32 value) +{ + return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + + +/* + * Function table for BIOS32 access + */ + +static struct pci_ops pci_bios_access = { + pci_bios_read_config_byte, + pci_bios_read_config_word, + pci_bios_read_config_dword, + pci_bios_write_config_byte, + pci_bios_write_config_word, + pci_bios_write_config_dword +}; + +/* + * Try to find PCI BIOS. + */ + +static struct pci_ops * __devinit pci_find_bios(void) +{ + union bios32 *check; + unsigned char sum; + int i, length; + + /* + * Follow the standard procedure for locating the BIOS32 Service + * directory by scanning the permissible address range from + * 0xe0000 through 0xfffff for a valid BIOS32 structure. + */ + + for (check = (union bios32 *) __va(0xe0000); + check <= (union bios32 *) __va(0xffff0); + ++check) { + if (check->fields.signature != BIOS32_SIGNATURE) + continue; + length = check->fields.length * 16; + if (!length) + continue; + sum = 0; + for (i = 0; i < length ; ++i) + sum += check->chars[i]; + if (sum != 0) + continue; + if (check->fields.revision != 0) { + printk("PCI: unsupported BIOS32 revision %d at 0x%p\n", + check->fields.revision, check); + continue; + } + DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check); + if (check->fields.entry >= 0x100000) { + printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check); + return NULL; + } else { + unsigned long bios32_entry = check->fields.entry; + DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry); + bios32_indirect.address = bios32_entry + PAGE_OFFSET; + if (check_pcibios()) + return &pci_bios_access; + } + break; /* Hopefully more than one BIOS32 cannot happen... */ + } + + return NULL; +} + +/* + * Sort the device list according to PCI BIOS. Nasty hack, but since some + * fool forgot to define the `correct' device order in the PCI BIOS specs + * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels + * which used BIOS ordering, we are bound to do this... + */ + +static void __devinit pcibios_sort(void) +{ + LIST_HEAD(sorted_devices); + struct list_head *ln; + struct pci_dev *dev, *d; + int idx, found; + unsigned char bus, devfn; + + DBG("PCI: Sorting device list...\n"); + while (!list_empty(&pci_devices)) { + ln = pci_devices.next; + dev = pci_dev_g(ln); + idx = found = 0; + while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) { + idx++; + for (ln=pci_devices.next; ln != &pci_devices; ln=ln->next) { + d = pci_dev_g(ln); + if (d->bus->number == bus && d->devfn == devfn) { + list_del(&d->global_list); + list_add_tail(&d->global_list, &sorted_devices); + if (d == dev) + found = 1; + break; + } + } + if (ln == &pci_devices) { + printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn); + /* + * We must not continue scanning as several buggy BIOSes + * return garbage after the last device. Grr. + */ + break; + } + } + if (!found) { + printk(KERN_WARNING "PCI: Device %02x:%02x not found by BIOS\n", + dev->bus->number, dev->devfn); + list_del(&dev->global_list); + list_add_tail(&dev->global_list, &sorted_devices); + } + } + list_splice(&sorted_devices, &pci_devices); +} + +/* + * BIOS Functions for IRQ Routing + */ + +struct irq_routing_options { + u16 size; + struct irq_info *table; + u16 segment; +} __attribute__((packed)); + +struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void) +{ + struct irq_routing_options opt; + struct irq_routing_table *rt = NULL; + int ret, map; + unsigned long page; + + if (!pci_bios_present) + return NULL; + page = __get_free_page(GFP_KERNEL); + if (!page) + return NULL; + opt.table = (struct irq_info *) page; + opt.size = PAGE_SIZE; + opt.segment = __KERNEL_DS; + + DBG("PCI: Fetching IRQ routing table... "); + __asm__("push %%es\n\t" + "push %%ds\n\t" + "pop %%es\n\t" + "lcall *(%%esi); cld\n\t" + "pop %%es\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret), + "=b" (map), + "+m" (opt) + : "0" (PCIBIOS_GET_ROUTING_OPTIONS), + "1" (0), + "D" ((long) &opt), + "S" (&pci_indirect)); + DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); + if (ret & 0xff00) + printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff); + else if (opt.size) { + rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL); + if (rt) { + memset(rt, 0, sizeof(struct irq_routing_table)); + rt->size = opt.size + sizeof(struct irq_routing_table); + rt->exclusive_irqs = map; + memcpy(rt->slots, (void *) page, opt.size); + printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n"); + } + } + free_page(page); + return rt; +} + + +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) +{ + int ret; + + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret) + : "0" (PCIBIOS_SET_PCI_HW_INT), + "b" ((dev->bus->number << 8) | dev->devfn), + "c" ((irq << 8) | (pin + 10)), + "S" (&pci_indirect)); + return !(ret & 0xff00); +} + +#endif + +/* + * Several buggy motherboards address only 16 devices and mirror + * them to next 16 IDs. We try to detect this `feature' on all + * primary buses (those containing host bridges as they are + * expected to be unique) and remove the ghost devices. + */ + +static void __devinit pcibios_fixup_ghosts(struct pci_bus *b) +{ + struct list_head *ln, *mn; + struct pci_dev *d, *e; + int mirror = PCI_DEVFN(16,0); + int seen_host_bridge = 0; + int i; + + DBG("PCI: Scanning for ghost devices on bus %d\n", b->number); + for (ln=b->devices.next; ln != &b->devices; ln=ln->next) { + d = pci_dev_b(ln); + if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST) + seen_host_bridge++; + for (mn=ln->next; mn != &b->devices; mn=mn->next) { + e = pci_dev_b(mn); + if (e->devfn != d->devfn + mirror || + e->vendor != d->vendor || + e->device != d->device || + e->class != d->class) + continue; + for(i=0; iresource[i].start != d->resource[i].start || + e->resource[i].end != d->resource[i].end || + e->resource[i].flags != d->resource[i].flags) + continue; + break; + } + if (mn == &b->devices) + return; + } + if (!seen_host_bridge) + return; + printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number); + + ln = &b->devices; + while (ln->next != &b->devices) { + d = pci_dev_b(ln->next); + if (d->devfn >= mirror) { + list_del(&d->global_list); + list_del(&d->bus_list); + kfree(d); + } else + ln = ln->next; + } +} + +/* + * Discover remaining PCI buses in case there are peer host bridges. + * We use the number of last PCI bus provided by the PCI BIOS. + */ +static void __devinit pcibios_fixup_peer_bridges(void) +{ + int n; + struct pci_bus bus; + struct pci_dev dev; + u16 l; + + if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) + return; + DBG("PCI: Peer bridge fixup\n"); + for (n=0; n <= pcibios_last_bus; n++) { + if (pci_bus_exists(&pci_root_buses, n)) + continue; + bus.number = n; + bus.ops = pci_root_ops; + dev.bus = &bus; + for(dev.devfn=0; dev.devfn<256; dev.devfn += 8) + if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) && + l != 0x0000 && l != 0xffff) { + DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l); + printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); + pci_scan_bus(n, pci_root_ops, NULL); + break; + } + } +} + +/* + * Exceptions for specific devices. Usually work-arounds for fatal design flaws. + */ + +static void __devinit pci_fixup_i450nx(struct pci_dev *d) +{ + /* + * i450NX -- Find and scan all secondary buses on all PXB's. + */ + int pxb, reg; + u8 busno, suba, subb; +#ifdef CONFIG_MULTIQUAD + int quad = BUS2QUAD(d->bus->number); +#endif + printk("PCI: Searching for i450NX host bridges on %s\n", d->slot_name); + reg = 0xd0; + for(pxb=0; pxb<2; pxb++) { + pci_read_config_byte(d, reg++, &busno); + pci_read_config_byte(d, reg++, &suba); + pci_read_config_byte(d, reg++, &subb); + DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + if (busno) + pci_scan_bus(QUADLOCAL2BUS(quad,busno), pci_root_ops, NULL); /* Bus A */ + if (suba < subb) + pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), pci_root_ops, NULL); /* Bus B */ + } + pcibios_last_bus = -1; +} + +static void __devinit pci_fixup_i450gx(struct pci_dev *d) +{ + /* + * i450GX and i450KX -- Find and scan all secondary buses. + * (called separately for each PCI bridge found) + */ + u8 busno; + pci_read_config_byte(d, 0x4a, &busno); + printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", d->slot_name, busno); + pci_scan_bus(busno, pci_root_ops, NULL); + pcibios_last_bus = -1; +} + +static void __devinit pci_fixup_umc_ide(struct pci_dev *d) +{ + /* + * UM8886BF IDE controller sets region type bits incorrectly, + * therefore they look like memory despite of them being I/O. + */ + int i; + + printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", d->slot_name); + for(i=0; i<4; i++) + d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO; +} + +static void __devinit pci_fixup_ncr53c810(struct pci_dev *d) +{ + /* + * NCR 53C810 returns class code 0 (at least on some systems). + * Fix class to be PCI_CLASS_STORAGE_SCSI + */ + if (!d->class) { + printk("PCI: fixing NCR 53C810 class code for %s\n", d->slot_name); + d->class = PCI_CLASS_STORAGE_SCSI << 8; + } +} + +static void __devinit pci_fixup_ide_bases(struct pci_dev *d) +{ + int i; + + /* + * PCI IDE controllers use non-standard I/O port decoding, respect it. + */ + if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE) + return; + DBG("PCI: IDE base address fixup for %s\n", d->slot_name); + for(i=0; i<4; i++) { + struct resource *r = &d->resource[i]; + if ((r->start & ~0x80) == 0x374) { + r->start |= 2; + r->end = r->start; + } + } +} + +static void __devinit pci_fixup_ide_trash(struct pci_dev *d) +{ + int i; + + /* + * There exist PCI IDE controllers which have utter garbage + * in first four base registers. Ignore that. + */ + DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name); + for(i=0; i<4; i++) + d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0; +} + +static void __devinit pci_fixup_latency(struct pci_dev *d) +{ + /* + * SiS 5597 and 5598 chipsets require latency timer set to + * at most 32 to avoid lockups. + */ + DBG("PCI: Setting max latency to 32\n"); + pcibios_max_latency = 32; +} + +static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d) +{ + /* + * PIIX4 ACPI device: hardwired IRQ9 + */ + d->irq = 9; +} + +/* + * Addresses issues with problems in the memory write queue timer in + * certain VIA Northbridges. This bugfix is per VIA's specifications, + * except for the KL133/KM133: clearing bit 5 on those Northbridges seems + * to trigger a bug in its integrated ProSavage video card, which + * causes screen corruption. We only clear bits 6 and 7 for that chipset, + * until VIA can provide us with definitive information on why screen + * corruption occurs, and what exactly those bits do. + * + * VIA 8363,8622,8361 Northbridges: + * - bits 5, 6, 7 at offset 0x55 need to be turned off + * VIA 8367 (KT266x) Northbridges: + * - bits 5, 6, 7 at offset 0x95 need to be turned off + * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges: + * - bits 6, 7 at offset 0x55 need to be turned off + */ + +#define VIA_8363_KL133_REVISION_ID 0x81 +#define VIA_8363_KM133_REVISION_ID 0x84 + +static void __init pci_fixup_via_northbridge_bug(struct pci_dev *d) +{ + u8 v; + u8 revision; + int where = 0x55; + int mask = 0x1f; /* clear bits 5, 6, 7 by default */ + + pci_read_config_byte(d, PCI_REVISION_ID, &revision); + + if (d->device == PCI_DEVICE_ID_VIA_8367_0) { + /* fix pci bus latency issues resulted by NB bios error + it appears on bug free^Wreduced kt266x's bios forces + NB latency to zero */ + pci_write_config_byte(d, PCI_LATENCY_TIMER, 0); + + where = 0x95; /* the memory write queue timer register is + different for the KT266x's: 0x95 not 0x55 */ + } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 && + (revision == VIA_8363_KL133_REVISION_ID || + revision == VIA_8363_KM133_REVISION_ID)) { + mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5 + causes screen corruption on the KL133/KM133 */ + } + + pci_read_config_byte(d, where, &v); + if (v & ~mask) { + printk("Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \ + d->device, revision, where, v, mask, v & mask); + v &= mask; + pci_write_config_byte(d, where, v); + } +} + +/* + * For some reasons Intel decided that certain parts of their + * 815, 845 and some other chipsets must look like PCI-to-PCI bridges + * while they are obviously not. The 82801 family (AA, AB, BAM/CAM, + * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according + * to Intel terminology. These devices do forward all addresses from + * system to PCI bus no matter what are their window settings, so they are + * "transparent" (or subtractive decoding) from programmers point of view. + */ +static void __init pci_fixup_transparent_bridge(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && + (dev->device & 0xff00) == 0x2400) + dev->transparent = 1; +} + +struct pci_fixup pcibios_fixups[] = { + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513, pci_fixup_ide_trash }, + { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810 }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge }, + { 0 } +}; + +/* + * Called after each bus is probed, but before its children + * are examined. + */ + +void __devinit pcibios_fixup_bus(struct pci_bus *b) +{ + pcibios_fixup_ghosts(b); + pci_read_bridge_bases(b); +} + +struct pci_bus * __devinit pcibios_scan_root(int busnum) +{ + struct list_head *list; + struct pci_bus *bus; + + list_for_each(list, &pci_root_buses) { + bus = pci_bus_b(list); + if (bus->number == busnum) { + /* Already scanned */ + return bus; + } + } + + printk("PCI: Probing PCI hardware (bus %02x)\n", busnum); + + return pci_scan_bus(busnum, pci_root_ops, NULL); +} + +void __devinit pcibios_config_init(void) +{ + /* + * Try all known PCI access methods. Note that we support using + * both PCI BIOS and direct access, with a preference for direct. + */ + +#ifdef CONFIG_PCI_DIRECT + struct pci_ops *tmp = NULL; +#endif + + +#ifdef CONFIG_PCI_BIOS + if ((pci_probe & PCI_PROBE_BIOS) + && ((pci_root_ops = pci_find_bios()))) { + pci_probe |= PCI_BIOS_SORT; + pci_bios_present = 1; + pci_config_read = pci_bios_read; + pci_config_write = pci_bios_write; + } +#endif + +#ifdef CONFIG_PCI_DIRECT + if ((pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2)) + && (tmp = pci_check_direct())) { + pci_root_ops = tmp; + if (pci_root_ops == &pci_direct_conf1) { + pci_config_read = pci_conf1_read; + pci_config_write = pci_conf1_write; + } + else { + pci_config_read = pci_conf2_read; + pci_config_write = pci_conf2_write; + } + } +#endif + + return; +} + +void __init pcibios_init(void) +{ + int quad; + + if (!pci_root_ops) + pcibios_config_init(); + if (!pci_root_ops) { + printk(KERN_WARNING "PCI: System does not support PCI\n"); + return; + } + + pcibios_set_cacheline_size(); + + printk(KERN_INFO "PCI: Probing PCI hardware\n"); +#ifdef CONFIG_ACPI_PCI + if (!acpi_noirq && !acpi_pci_irq_init()) { + pci_using_acpi_prt = 1; + printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + printk(KERN_INFO "PCI: if you experience problems, try using option 'pci=noacpi' or even 'acpi=off'\n"); + } +#endif + if (!pci_using_acpi_prt) { + pci_root_bus = pcibios_scan_root(0); + pcibios_irq_init(); + pcibios_fixup_peer_bridges(); + pcibios_fixup_irqs(); + } + if (clustered_apic_mode && (numnodes > 1)) { + for (quad = 1; quad < numnodes; ++quad) { + printk("Scanning PCI bus %d for quad %d\n", + QUADLOCAL2BUS(quad,0), quad); + pci_scan_bus(QUADLOCAL2BUS(quad,0), + pci_root_ops, NULL); + } + } + + pcibios_resource_survey(); + +#ifdef CONFIG_PCI_BIOS + if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT)) + pcibios_sort(); +#endif +} + +char * __devinit pcibios_setup(char *str) +{ + if (!strcmp(str, "off")) { + pci_probe = 0; + return NULL; + } +#ifdef CONFIG_PCI_BIOS + else if (!strcmp(str, "bios")) { + pci_probe = PCI_PROBE_BIOS; + return NULL; + } else if (!strcmp(str, "nobios")) { + pci_probe &= ~PCI_PROBE_BIOS; + return NULL; + } else if (!strcmp(str, "nosort")) { + pci_probe |= PCI_NO_SORT; + return NULL; + } else if (!strcmp(str, "biosirq")) { + pci_probe |= PCI_BIOS_IRQ_SCAN; + return NULL; + } +#endif +#ifdef CONFIG_PCI_DIRECT + else if (!strcmp(str, "conf1")) { + pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS; + return NULL; + } + else if (!strcmp(str, "conf2")) { + pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS; + return NULL; + } +#endif + else if (!strcmp(str, "rom")) { + pci_probe |= PCI_ASSIGN_ROMS; + return NULL; + } else if (!strcmp(str, "assign-busses")) { + pci_probe |= PCI_ASSIGN_ALL_BUSSES; + return NULL; + } else if (!strncmp(str, "irqmask=", 8)) { + pcibios_irq_mask = simple_strtol(str+8, NULL, 0); + return NULL; + } else if (!strncmp(str, "lastbus=", 8)) { + pcibios_last_bus = simple_strtol(str+8, NULL, 0); + return NULL; + } else if (!strncmp(str, "noacpi", 6)) { + acpi_noirq_set(); + return NULL; + } + return str; +} + +unsigned int pcibios_assign_all_busses(void) +{ + return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + int err; + + if ((err = pcibios_enable_resources(dev, mask)) < 0) + return err; + +#ifdef CONFIG_ACPI_PCI + if (pci_using_acpi_prt) { + acpi_pci_irq_enable(dev); + return 0; + } +#endif + + pcibios_enable_irq(dev); + + return 0; +} diff --git a/xen/arch/x86/pci-x86.c b/xen/arch/x86/pci-x86.c new file mode 100644 index 0000000000..7efc79c2da --- /dev/null +++ b/xen/arch/x86/pci-x86.c @@ -0,0 +1,402 @@ +/* + * Low-Level PCI Access for i386 machines + * + * Copyright 1993, 1994 Drew Eckhardt + * Visionary Computing + * (Unix and Linux consulting and custom programming) + * Drew@Colorado.EDU + * +1 (303) 786-7975 + * + * Drew's work was sponsored by: + * iX Multiuser Multitasking Magazine + * Hannover, Germany + * hm@ix.de + * + * Copyright 1997--2000 Martin Mares + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + * + * + * CHANGELOG : + * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION + * Revision 2.0 present on 's ASUS mainboard. + * + * Jan 5, 1995 : Modified to probe PCI hardware at boot time by Frederic + * Potter, potter@cao-vlsi.ibp.fr + * + * Jan 10, 1995 : Modified to store the information about configured pci + * devices into a list, which can be accessed via /proc/pci by + * Curtis Varner, cvarner@cs.ucr.edu + * + * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter. + * Alpha version. Intel & UMC chipset support only. + * + * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code + * moved to drivers/pci/pci.c. + * + * Dec 7, 1996 : Added support for direct configuration access of boards + * with Intel compatible access schemes (tsbogend@alpha.franken.de) + * + * Feb 3, 1997 : Set internal functions to static, save/restore flags + * avoid dead locks reading broken PCI BIOS, werner@suse.de + * + * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS + * (mj@atrey.karlin.mff.cuni.cz) + * + * May 7, 1997 : Added some missing cli()'s. [mj] + * + * Jun 20, 1997 : Corrected problems in "conf1" type accesses. + * (paubert@iram.es) + * + * Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts + * and cleaned it up... Martin Mares + * + * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj] + * + * May 1, 1998 : Support for peer host bridges. [mj] + * + * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space + * can be accessed from interrupts even on SMP systems. [mj] + * + * August 1998 : Better support for peer host bridges and more paranoid + * checks for direct hardware access. Ugh, this file starts to look as + * a large gallery of common hardware bug workarounds (watch the comments) + * -- the PCI specs themselves are sane, but most implementors should be + * hit hard with \hammer scaled \magstep5. [mj] + * + * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj] + * + * Feb 8, 1999 : Added UM8886BF I/O address fixup. [mj] + * + * August 1999 : New resource management and configuration access stuff. [mj] + * + * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges. + * Based on ideas by Chris Frantz and David Hinds. [mj] + * + * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi + * for a lot of patience during testing. [mj] + * + * Oct 8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj] + */ + +#include +#include +#include +#include +#include +#include + +#include "pci-x86.h" + +void +pcibios_update_resource(struct pci_dev *dev, struct resource *root, + struct resource *res, int resource) +{ + u32 new, check; + int reg; + + new = res->start | (res->flags & PCI_REGION_FLAG_MASK); + if (resource < 6) { + reg = PCI_BASE_ADDRESS_0 + 4*resource; + } else if (resource == PCI_ROM_RESOURCE) { + res->flags |= PCI_ROM_ADDRESS_ENABLE; + new |= PCI_ROM_ADDRESS_ENABLE; + reg = dev->rom_base_reg; + } else { + /* Somebody might have asked allocation of a non-standard resource */ + return; + } + + pci_write_config_dword(dev, reg, new); + pci_read_config_dword(dev, reg, &check); + if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { + printk(KERN_ERR "PCI: Error while updating region " + "%s/%d (%08x != %08x)\n", dev->slot_name, resource, + new, check); + } +} + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + * + * Why? Because some silly external IO cards only decode + * the low 10 bits of the IO address. The 0x00-0xff region + * is reserved for motherboard devices that decode all 16 + * bits, so it's ok to allocate at, say, 0x2800-0x28ff, + * but we want to try to avoid allocating at 0x2900-0x2bff + * which might have be mirrored at 0x0100-0x03ff.. + */ +void +pcibios_align_resource(void *data, struct resource *res, + unsigned long size, unsigned long align) +{ + if (res->flags & IORESOURCE_IO) { + unsigned long start = res->start; + + if (start & 0x300) { + start = (start + 0x3ff) & ~0x3ff; + res->start = start; + } + } +} + + +/* + * Handle resources of PCI devices. If the world were perfect, we could + * just allocate all the resource regions and do nothing more. It isn't. + * On the other hand, we cannot just re-allocate all devices, as it would + * require us to know lots of host bridge internals. So we attempt to + * keep as much of the original configuration as possible, but tweak it + * when it's found to be wrong. + * + * Known BIOS problems we have to work around: + * - I/O or memory regions not configured + * - regions configured, but not enabled in the command register + * - bogus I/O addresses above 64K used + * - expansion ROMs left enabled (this may sound harmless, but given + * the fact the PCI specs explicitly allow address decoders to be + * shared between expansion ROMs and other resource regions, it's + * at least dangerous) + * + * Our solution: + * (1) Allocate resources for all buses behind PCI-to-PCI bridges. + * This gives us fixed barriers on where we can allocate. + * (2) Allocate resources for all enabled devices. If there is + * a collision, just mark the resource as unallocated. Also + * disable expansion ROMs during this step. + * (3) Try to allocate resources for disabled devices. If the + * resources were assigned correctly, everything goes well, + * if they weren't, they won't disturb allocation of other + * resources. + * (4) Assign new addresses to resources which were either + * not configured at all or misconfigured. If explicitly + * requested by the user, configure expansion ROM address + * as well. + */ + +static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) +{ + struct list_head *ln; + struct pci_bus *bus; + struct pci_dev *dev; + int idx; + struct resource *r, *pr; + + /* Depth-First Search on bus tree */ + for (ln=bus_list->next; ln != bus_list; ln=ln->next) { + bus = pci_bus_b(ln); + if ((dev = bus->self)) { + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { + r = &dev->resource[idx]; + if (!r->start) + continue; + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) + printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name); + } + } + pcibios_allocate_bus_resources(&bus->children); + } +} + +static void __init pcibios_allocate_resources(int pass) +{ + struct pci_dev *dev; + int idx, disabled; + u16 command; + struct resource *r, *pr; + + pci_for_each_dev(dev) { + pci_read_config_word(dev, PCI_COMMAND, &command); + for(idx = 0; idx < 6; idx++) { + r = &dev->resource[idx]; + if (r->parent) /* Already allocated */ + continue; + if (!r->start) /* Address not assigned at all */ + continue; + if (r->flags & IORESOURCE_IO) + disabled = !(command & PCI_COMMAND_IO); + else + disabled = !(command & PCI_COMMAND_MEMORY); + if (pass == disabled) { + DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n", + r->start, r->end, r->flags, disabled, pass); + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) { + printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name); + /* We'll assign a new address later */ + r->end -= r->start; + r->start = 0; + } + } + } + if (!pass) { + r = &dev->resource[PCI_ROM_RESOURCE]; + if (r->flags & PCI_ROM_ADDRESS_ENABLE) { + /* Turn the ROM off, leave the resource region, but keep it unregistered. */ + u32 reg; + DBG("PCI: Switching off ROM of %s\n", dev->slot_name); + r->flags &= ~PCI_ROM_ADDRESS_ENABLE; + pci_read_config_dword(dev, dev->rom_base_reg, ®); + pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE); + } + } + } +} + +static void __init pcibios_assign_resources(void) +{ + struct pci_dev *dev; + int idx; + struct resource *r; + + pci_for_each_dev(dev) { + int class = dev->class >> 8; + + /* Don't touch classless devices and host bridges */ + if (!class || class == PCI_CLASS_BRIDGE_HOST) + continue; + + for(idx=0; idx<6; idx++) { + r = &dev->resource[idx]; + + /* + * Don't touch IDE controllers and I/O ports of video cards! + */ + if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) || + (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO))) + continue; + + /* + * We shall assign a new address to this resource, either because + * the BIOS forgot to do so or because we have decided the old + * address was unusable for some reason. + */ + if (!r->start && r->end) + pci_assign_resource(dev, idx); + } + + if (pci_probe & PCI_ASSIGN_ROMS) { + r = &dev->resource[PCI_ROM_RESOURCE]; + r->end -= r->start; + r->start = 0; + if (r->end) + pci_assign_resource(dev, PCI_ROM_RESOURCE); + } + } +} + +void __init pcibios_set_cacheline_size(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + pci_cache_line_size = 32 >> 2; + if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) + pci_cache_line_size = 64 >> 2; /* K7 & K8 */ + else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) + pci_cache_line_size = 128 >> 2; /* P4 */ +} + +void __init pcibios_resource_survey(void) +{ + DBG("PCI: Allocating resources\n"); + pcibios_allocate_bus_resources(&pci_root_buses); + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); + pcibios_assign_resources(); +} + +int pcibios_enable_resources(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for(idx=0; idx<6; idx++) { + /* Only set up the requested stuff */ + if (!(mask & (1<resource[idx]; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (dev->resource[PCI_ROM_RESOURCE].start) + cmd |= PCI_COMMAND_MEMORY; + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +/* + * If we set up a device for bus mastering, we need to check the latency + * timer as certain crappy BIOSes forget to set it properly. + */ +unsigned int pcibios_max_latency = 255; + +void pcibios_set_master(struct pci_dev *dev) +{ + u8 lat; + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); + if (lat < 16) + lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; + else if (lat > pcibios_max_latency) + lat = pcibios_max_latency; + else + return; + printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); +} + +#if 0 +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + unsigned long prot; + + /* I/O space cannot be accessed via normal processor loads and + * stores on this platform. + */ + if (mmap_state == pci_mmap_io) + return -EINVAL; + + /* Leave vm_pgoff as-is, the PCI space address is the physical + * address on this platform. + */ + vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO); + + prot = pgprot_val(vma->vm_page_prot); + if (boot_cpu_data.x86 > 3) + prot |= _PAGE_PCD | _PAGE_PWT; + vma->vm_page_prot = __pgprot(prot); + + /* Write-combine setting is ignored, it is changed via the mtrr + * interfaces on this platform. + */ + if (remap_page_range(vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} +#endif diff --git a/xen/arch/x86/pci-x86.h b/xen/arch/x86/pci-x86.h new file mode 100644 index 0000000000..fe70b10166 --- /dev/null +++ b/xen/arch/x86/pci-x86.h @@ -0,0 +1,71 @@ +/* + * Low-Level PCI Access for i386 machines. + * + * (c) 1999 Martin Mares + */ + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define PCI_PROBE_BIOS 0x0001 +#define PCI_PROBE_CONF1 0x0002 +#define PCI_PROBE_CONF2 0x0004 +#define PCI_NO_SORT 0x0100 +#define PCI_BIOS_SORT 0x0200 +#define PCI_NO_CHECKS 0x0400 +#define PCI_ASSIGN_ROMS 0x1000 +#define PCI_BIOS_IRQ_SCAN 0x2000 +#define PCI_ASSIGN_ALL_BUSSES 0x4000 + +extern unsigned int pci_probe; + +/* pci-i386.c */ + +extern unsigned int pcibios_max_latency; +extern u8 pci_cache_line_size; + +void pcibios_resource_survey(void); +void pcibios_set_cacheline_size(void); +int pcibios_enable_resources(struct pci_dev *, int); + +/* pci-pc.c */ + +extern int pcibios_last_bus; +extern struct pci_bus *pci_root_bus; +extern struct pci_ops *pci_root_ops; + +/* pci-irq.c */ + +struct irq_info { + u8 bus, devfn; /* Bus, device and function */ + struct { + u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ + u16 bitmap; /* Available IRQs */ + } __attribute__((packed)) irq[4]; + u8 slot; /* Slot number, 0=onboard */ + u8 rfu; +} __attribute__((packed)); + +struct irq_routing_table { + u32 signature; /* PIRQ_SIGNATURE should be here */ + u16 version; /* PIRQ_VERSION */ + u16 size; /* Table size in bytes */ + u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ + u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ + u32 miniport_data; /* Crap */ + u8 rfu[11]; + u8 checksum; /* Modulo 256 checksum must give zero */ + struct irq_info slots[0]; +} __attribute__((packed)); + +extern unsigned int pcibios_irq_mask; + +void pcibios_irq_init(void); +void pcibios_fixup_irqs(void); +void pcibios_enable_irq(struct pci_dev *dev); diff --git a/xen/arch/x86/pdb-linux.c b/xen/arch/x86/pdb-linux.c new file mode 100644 index 0000000000..fd0fc5ed78 --- /dev/null +++ b/xen/arch/x86/pdb-linux.c @@ -0,0 +1,100 @@ + +/* + * pervasive debugger + * www.cl.cam.ac.uk/netos/pdb + * + * alex ho + * 2004 + * university of cambridge computer laboratory + * + * linux & i386 dependent code. bleech. + */ + +#include + +/* offset to the first instruction in the linux system call code + where we can safely set a breakpoint */ +unsigned int pdb_linux_syscall_enter_bkpt_offset = 20; + +/* offset to eflags saved on the stack after an int 80 */ +unsigned int pdb_linux_syscall_eflags_offset = 48; + +/* offset to the instruction pointer saved on the stack after an int 80 */ +unsigned int pdb_linux_syscall_eip_offset = 40; + +unsigned char +pdb_linux_set_bkpt (unsigned long addr) +{ + unsigned char old_instruction = *(unsigned char *)addr; + *(unsigned char *)addr = 0xcc; + return old_instruction; +} + +void +pdb_linux_clr_bkpt (unsigned long addr, unsigned char value) +{ + *(unsigned char *)addr = value; +} + +void +pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code, + trap_info_t *ti) +{ + /* set at breakpoint at the beginning of the + system call in the target domain */ + + pdb_system_call_enter_instr = pdb_linux_set_bkpt(ti->address + + pdb_linux_syscall_enter_bkpt_offset); + pdb_system_call = 1; +} + +void +pdb_linux_syscall_exit_bkpt (struct pt_regs *regs, struct pdb_context *pdb_ctx) +{ + /* + we've hit an int 0x80 in a user's program, jumped into xen + (traps.c::do_general_protection()) which re-wrote the next + instruction in the os kernel to 0xcc, and then hit that + exception. + + we need to re-write the return instruction in the user's + program so that we know when we have finished the system call + and are back in the user's program. + + at this point our stack should look something like this: + + esp = 0x80a59f0 + esp + 4 = 0x0 + esp + 8 = 0x80485a0 + esp + 12 = 0x2d + esp + 16 = 0x80485f4 + esp + 20 = 0xbffffa48 + esp + 24 = 0xd + esp + 28 = 0xc00a0833 + esp + 32 = 0x833 + esp + 36 = 0xd + esp + 40 = 0x804dcdd saved eip + esp + 44 = 0x82b saved cs + esp + 48 = 0x213392 saved eflags + esp + 52 = 0xbffffa2c saved esp + esp + 56 = 0x833 saved ss + esp + 60 = 0x1000000 + */ + + /* restore the entry instruction for the system call */ + pdb_linux_clr_bkpt(regs->eip - 1, pdb_system_call_enter_instr); + + /* save the address of eflags that was saved on the stack */ + pdb_system_call_eflags_addr = (regs->esp + + pdb_linux_syscall_eflags_offset); + + /* muck with the return instruction so that we trap back into the + debugger when re-entering user space */ + pdb_system_call_next_addr = *(unsigned long *)(regs->esp + + pdb_linux_syscall_eip_offset); + pdb_linux_get_values (&pdb_system_call_leave_instr, 1, + pdb_system_call_next_addr, + pdb_ctx->process, pdb_ctx->ptbr); + pdb_linux_set_values ("cc", 1, pdb_system_call_next_addr, + pdb_ctx->process, pdb_ctx->ptbr); +} diff --git a/xen/arch/x86/pdb-stub.c b/xen/arch/x86/pdb-stub.c new file mode 100644 index 0000000000..9997218e7f --- /dev/null +++ b/xen/arch/x86/pdb-stub.c @@ -0,0 +1,1335 @@ + +/* + * pervasive debugger + * www.cl.cam.ac.uk/netos/pdb + * + * alex ho + * 2004 + * university of cambridge computer laboratory + * + * code adapted originally from kgdb, nemesis, & gdbserver + */ + +#include +#include +#include +#include +#include +#include /* [un]map_domain_mem */ +#include +#include +#include +#include + +#undef PDB_DEBUG_TRACE +#ifdef PDB_DEBUG_TRACE +#define TRC(_x) _x +#else +#define TRC(_x) +#endif + +#define DEBUG_EXCEPTION 0x01 +#define BREAKPT_EXCEPTION 0x03 +#define PDB_LIVE_EXCEPTION 0x58 +#define KEYPRESS_EXCEPTION 0x88 + +#define BUFMAX 400 + +static const char hexchars[] = "0123456789abcdef"; + +static int remote_debug; + +#define PDB_BUFMAX 1024 +static char pdb_in_buffer[PDB_BUFMAX]; +static char pdb_out_buffer[PDB_BUFMAX]; +static char pdb_buffer[PDB_BUFMAX]; +static int pdb_in_buffer_ptr; +static unsigned char pdb_in_checksum; +static unsigned char pdb_xmit_checksum; + +struct pdb_context pdb_ctx; +int pdb_continue_thread = 0; +int pdb_general_thread = 0; + +void pdb_put_packet (unsigned char *buffer, int ack); +void pdb_bkpt_check (u_char *buffer, int length, + unsigned long cr3, unsigned long addr); + +int pdb_initialized = 0; +int pdb_page_fault_possible = 0; +int pdb_page_fault_scratch = 0; /* just a handy variable */ +int pdb_page_fault = 0; +static int pdb_serhnd = -1; +static int pdb_stepping = 0; + +int pdb_system_call = 0; +unsigned char pdb_system_call_enter_instr = 0; /* original enter instr */ +unsigned char pdb_system_call_leave_instr = 0; /* original next instr */ +unsigned long pdb_system_call_next_addr = 0; /* instr after int 0x80 */ +unsigned long pdb_system_call_eflags_addr = 0; /* saved eflags on stack */ + +static inline void pdb_put_char(unsigned char c) +{ + serial_putc(pdb_serhnd, c); +} + +static inline unsigned char pdb_get_char(void) +{ + return serial_getc(pdb_serhnd); +} + +int +get_char (char *addr) +{ + return *addr; +} + +void +set_char (char *addr, int val) +{ + *addr = val; +} + +void +pdb_process_query (char *ptr) +{ + if (strcmp(ptr, "C") == 0) + { + /* empty string */ + } + else if (strcmp(ptr, "fThreadInfo") == 0) + { +#ifdef PDB_PAST + struct task_struct *p; + u_long flags; +#endif /* PDB_PAST */ + + int buf_idx = 0; + + pdb_out_buffer[buf_idx++] = 'l'; + pdb_out_buffer[buf_idx++] = 0; + +#ifdef PDB_PAST + switch (pdb_level) + { + case PDB_LVL_XEN: /* return a list of domains */ + { + int count = 0; + + read_lock_irqsave (&tasklist_lock, flags); + + pdb_out_buffer[buf_idx++] = 'm'; + for_each_domain ( p ) + { + domid_t domain = p->domain + PDB_ID_OFFSET; + + if (count > 0) + { + pdb_out_buffer[buf_idx++] = ','; + } + if (domain > 15) + { + pdb_out_buffer[buf_idx++] = hexchars[domain >> 4]; + } + pdb_out_buffer[buf_idx++] = hexchars[domain % 16]; + count++; + } + pdb_out_buffer[buf_idx++] = 0; + + read_unlock_irqrestore(&tasklist_lock, flags); + break; + } + case PDB_LVL_GUESTOS: /* return a list of processes */ + { + int foobar[20]; + int loop, total; + + /* this cr3 is wrong! */ + total = pdb_linux_process_list(pdb_ctx[pdb_level].info_cr3, + foobar, 20); + + pdb_out_buffer[buf_idx++] = 'm'; + pdb_out_buffer[buf_idx++] = '1'; /* 1 is to go back */ + for (loop = 0; loop < total; loop++) + { + int pid = foobar[loop] + PDB_ID_OFFSET; + + pdb_out_buffer[buf_idx++] = ','; + if (pid > 15) + { + pdb_out_buffer[buf_idx++] = hexchars[pid >> 4]; + } + pdb_out_buffer[buf_idx++] = hexchars[pid % 16]; + } + pdb_out_buffer[buf_idx++] = 0; + break; + } + case PDB_LVL_PROCESS: /* hmmm... */ + { + pdb_out_buffer[buf_idx++] = 'm'; + pdb_out_buffer[buf_idx++] = '1'; /* 1 is to go back */ + break; + } + default: + break; + } +#endif /* PDB_PAST */ + + } + else if (strcmp(ptr, "sThreadInfo") == 0) + { + int buf_idx = 0; + + pdb_out_buffer[buf_idx++] = 'l'; + pdb_out_buffer[buf_idx++] = 0; + } + else if (strncmp(ptr, "ThreadExtraInfo,", 16) == 0) + { + int thread = 0; + char *message = "foobar ?"; + + ptr += 16; + if (hexToInt (&ptr, &thread)) + { + mem2hex (message, pdb_out_buffer, strlen(message) + 1); + } + +#ifdef PDB_PAST + int thread = 0; + char message[16]; + struct task_struct *p; + + p = find_domain_by_id(pdb_ctx[pdb_level].info); + strncpy (message, p->name, 16); + put_task_struct(p); + + ptr += 16; + if (hexToInt (&ptr, &thread)) + { + mem2hex ((char *)message, pdb_out_buffer, strlen(message) + 1); + } +#endif /* PDB_PAST */ + +#ifdef PDB_FUTURE + { + char string[task_struct_comm_length]; + + string[0] = 0; + pdb_linux_process_details (cr3, pid, string); + printk (" (%s)", string); + } +#endif /* PDB_FUTURE*/ + + } + else if (strcmp(ptr, "Offsets") == 0) + { + /* empty string */ + } + else if (strncmp(ptr, "Symbol", 6) == 0) + { + strcpy (pdb_out_buffer, "OK"); + } + else + { + printk("pdb: error, unknown query [%s]\n", ptr); + } +} + +void +pdb_x86_to_gdb_regs (char *buffer, struct pt_regs *regs) +{ + int idx = 0; + + mem2hex ((char *)®s->eax, &buffer[idx], sizeof(regs->eax)); + idx += sizeof(regs->eax) * 2; + mem2hex ((char *)®s->ecx, &buffer[idx], sizeof(regs->ecx)); + idx += sizeof(regs->ecx) * 2; + mem2hex ((char *)®s->edx, &buffer[idx], sizeof(regs->edx)); + idx += sizeof(regs->edx) * 2; + mem2hex ((char *)®s->ebx, &buffer[idx], sizeof(regs->ebx)); + idx += sizeof(regs->ebx) * 2; + mem2hex ((char *)®s->esp, &buffer[idx], sizeof(regs->esp)); + idx += sizeof(regs->esp) * 2; + mem2hex ((char *)®s->ebp, &buffer[idx], sizeof(regs->ebp)); + idx += sizeof(regs->ebp) * 2; + mem2hex ((char *)®s->esi, &buffer[idx], sizeof(regs->esi)); + idx += sizeof(regs->esi) * 2; + mem2hex ((char *)®s->edi, &buffer[idx], sizeof(regs->edi)); + idx += sizeof(regs->edi) * 2; + mem2hex ((char *)®s->eip, &buffer[idx], sizeof(regs->eip)); + idx += sizeof(regs->eip) * 2; + mem2hex ((char *)®s->eflags, &buffer[idx], sizeof(regs->eflags)); + idx += sizeof(regs->eflags) * 2; + mem2hex ((char *)®s->xcs, &buffer[idx], sizeof(regs->xcs)); + idx += sizeof(regs->xcs) * 2; + mem2hex ((char *)®s->xss, &buffer[idx], sizeof(regs->xss)); + idx += sizeof(regs->xss) * 2; + mem2hex ((char *)®s->xds, &buffer[idx], sizeof(regs->xds)); + idx += sizeof(regs->xds) * 2; + mem2hex ((char *)®s->xes, &buffer[idx], sizeof(regs->xes)); + idx += sizeof(regs->xes) * 2; + mem2hex ((char *)®s->xfs, &buffer[idx], sizeof(regs->xfs)); + idx += sizeof(regs->xfs) * 2; + mem2hex ((char *)®s->xgs, &buffer[idx], sizeof(regs->xgs)); +} + +/* at this point we allow any register to be changed, caveat emptor */ +void +pdb_gdb_to_x86_regs (struct pt_regs *regs, char *buffer) +{ + hex2mem(buffer, (char *)®s->eax, sizeof(regs->eax)); + buffer += sizeof(regs->eax) * 2; + hex2mem(buffer, (char *)®s->ecx, sizeof(regs->ecx)); + buffer += sizeof(regs->ecx) * 2; + hex2mem(buffer, (char *)®s->edx, sizeof(regs->edx)); + buffer += sizeof(regs->edx) * 2; + hex2mem(buffer, (char *)®s->ebx, sizeof(regs->ebx)); + buffer += sizeof(regs->ebx) * 2; + hex2mem(buffer, (char *)®s->esp, sizeof(regs->esp)); + buffer += sizeof(regs->esp) * 2; + hex2mem(buffer, (char *)®s->ebp, sizeof(regs->ebp)); + buffer += sizeof(regs->ebp) * 2; + hex2mem(buffer, (char *)®s->esi, sizeof(regs->esi)); + buffer += sizeof(regs->esi) * 2; + hex2mem(buffer, (char *)®s->edi, sizeof(regs->edi)); + buffer += sizeof(regs->edi) * 2; + hex2mem(buffer, (char *)®s->eip, sizeof(regs->eip)); + buffer += sizeof(regs->eip) * 2; + hex2mem(buffer, (char *)®s->eflags, sizeof(regs->eflags)); + buffer += sizeof(regs->eflags) * 2; + hex2mem(buffer, (char *)®s->xcs, sizeof(regs->xcs)); + buffer += sizeof(regs->xcs) * 2; + hex2mem(buffer, (char *)®s->xss, sizeof(regs->xss)); + buffer += sizeof(regs->xss) * 2; + hex2mem(buffer, (char *)®s->xds, sizeof(regs->xds)); + buffer += sizeof(regs->xds) * 2; + hex2mem(buffer, (char *)®s->xes, sizeof(regs->xes)); + buffer += sizeof(regs->xes) * 2; + hex2mem(buffer, (char *)®s->xfs, sizeof(regs->xfs)); + buffer += sizeof(regs->xfs) * 2; + hex2mem(buffer, (char *)®s->xgs, sizeof(regs->xgs)); +} + +int +pdb_process_command (char *ptr, struct pt_regs *regs, unsigned long cr3, + int sigval) +{ + int length; + unsigned long addr; + int ack = 1; /* wait for ack in pdb_put_packet */ + int go = 0; + + TRC(printf("pdb: [%s]\n", ptr)); + + pdb_out_buffer[0] = 0; + + if (pdb_ctx.valid == 1) + { + if (pdb_ctx.domain == -1) /* pdb context: xen */ + { + struct task_struct *p; + + p = &idle0_task; + if (p->mm.shadow_mode) + pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table); + else + pdb_ctx.ptbr = pagetable_val(p->mm.pagetable); + } + else if (pdb_ctx.process == -1) /* pdb context: guest os */ + { + struct task_struct *p; + + if (pdb_ctx.domain == -2) + { + p = find_last_domain(); + } + else + { + p = find_domain_by_id(pdb_ctx.domain); + } + if (p == NULL) + { + printk ("pdb error: unknown domain [0x%x]\n", pdb_ctx.domain); + strcpy (pdb_out_buffer, "E01"); + pdb_ctx.domain = -1; + goto exit; + } + if (p->mm.shadow_mode) + pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table); + else + pdb_ctx.ptbr = pagetable_val(p->mm.pagetable); + put_task_struct(p); + } + else /* pdb context: process */ + { + struct task_struct *p; + unsigned long domain_ptbr; + + p = find_domain_by_id(pdb_ctx.domain); + if (p == NULL) + { + printk ("pdb error: unknown domain [0x%x][0x%x]\n", + pdb_ctx.domain, pdb_ctx.process); + strcpy (pdb_out_buffer, "E01"); + pdb_ctx.domain = -1; + goto exit; + } + if (p->mm.shadow_mode) + domain_ptbr = pagetable_val(p->mm.shadow_table); + else + domain_ptbr = pagetable_val(p->mm.pagetable); + put_task_struct(p); + + pdb_ctx.ptbr = domain_ptbr; + /*pdb_ctx.ptbr=pdb_linux_pid_ptbr(domain_ptbr, pdb_ctx.process);*/ + } + + pdb_ctx.valid = 0; + TRC(printk ("pdb change context (dom:%d, proc:%d) now 0x%lx\n", + pdb_ctx.domain, pdb_ctx.process, pdb_ctx.ptbr)); + } + + switch (*ptr++) + { + case '?': + pdb_out_buffer[0] = 'S'; + pdb_out_buffer[1] = hexchars[sigval >> 4]; + pdb_out_buffer[2] = hexchars[sigval % 16]; + pdb_out_buffer[3] = 0; + break; + case 'S': /* step with signal */ + case 's': /* step */ + { + if ( pdb_system_call_eflags_addr != 0 ) + { + unsigned long eflags; + char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */ + + pdb_linux_get_values((u_char*)&eflags, sizeof(eflags), + pdb_system_call_eflags_addr, + pdb_ctx.process, pdb_ctx.ptbr); + eflags |= X86_EFLAGS_TF; + mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags)); + pdb_linux_set_values(eflags_buf, sizeof(eflags), + pdb_system_call_eflags_addr, + pdb_ctx.process, pdb_ctx.ptbr); + } + + regs->eflags |= X86_EFLAGS_TF; + pdb_stepping = 1; + return 1; + /* not reached */ + } + case 'C': /* continue with signal */ + case 'c': /* continue */ + { + if ( pdb_system_call_eflags_addr != 0 ) + { + unsigned long eflags; + char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */ + + pdb_linux_get_values((u_char*)&eflags, sizeof(eflags), + pdb_system_call_eflags_addr, + pdb_ctx.process, pdb_ctx.ptbr); + eflags &= ~X86_EFLAGS_TF; + mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags)); + pdb_linux_set_values(eflags_buf, sizeof(eflags), + pdb_system_call_eflags_addr, + pdb_ctx.process, pdb_ctx.ptbr); + } + + regs->eflags &= ~X86_EFLAGS_TF; + return 1; /* jump out before replying to gdb */ + /* not reached */ + } + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + break; + case 'D': /* detach */ + return go; + /* not reached */ + case 'g': /* return the value of the CPU registers */ + { + pdb_x86_to_gdb_regs (pdb_out_buffer, regs); + break; + } + case 'G': /* set the value of the CPU registers - return OK */ + { + pdb_gdb_to_x86_regs (regs, ptr); + break; + } + case 'H': + { + int thread; + char *next = &ptr[1]; + + if (hexToInt (&next, &thread)) + { + if (*ptr == 'c') + { + pdb_continue_thread = thread; + } + else if (*ptr == 'g') + { + pdb_general_thread = thread; + } + else + { + printk ("pdb error: unknown set thread command %c (%d)\n", + *ptr, thread); + strcpy (pdb_out_buffer, "E00"); + break; + } + } + strcpy (pdb_out_buffer, "OK"); + break; + } + case 'k': /* kill request */ + { + strcpy (pdb_out_buffer, "OK"); /* ack for fun */ + printk ("don't kill bill...\n"); + ack = 0; + break; + } + + case 'q': + { + pdb_process_query(ptr); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + { + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + if (hexToInt (&ptr, (int *)&addr)) + if (*(ptr++) == ',') + if (hexToInt (&ptr, &length)) + { + ptr = 0; + + pdb_page_fault_possible = 1; + pdb_page_fault = 0; + if (addr >= PAGE_OFFSET) + { + mem2hex ((char *) addr, pdb_out_buffer, length); + } + else if (pdb_ctx.process != -1) + { + pdb_linux_get_values(pdb_buffer, length, addr, + pdb_ctx.process, pdb_ctx.ptbr); + mem2hex (pdb_buffer, pdb_out_buffer, length); + } + else + { + pdb_get_values (pdb_buffer, length, + pdb_ctx.ptbr, addr); + mem2hex (pdb_buffer, pdb_out_buffer, length); + } + + pdb_page_fault_possible = 0; + if (pdb_page_fault) + { + strcpy (pdb_out_buffer, "E03"); + } + } + + if (ptr) + { + strcpy (pdb_out_buffer, "E01"); + } + break; + } + + /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */ + case 'M': + { + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + if (hexToInt (&ptr, (int *)&addr)) + if (*(ptr++) == ',') + if (hexToInt (&ptr, &length)) + if (*(ptr++) == ':') + { + + pdb_page_fault_possible = 1; + pdb_page_fault = 0; + if (addr >= PAGE_OFFSET) + { + hex2mem (ptr, (char *)addr, length); + pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr); + } + else if (pdb_ctx.process != -1) + { + pdb_linux_set_values(ptr, length, addr, + pdb_ctx.process, + pdb_ctx.ptbr); + pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr); + } + else + { + pdb_set_values (ptr, length, + pdb_ctx.ptbr, addr); + pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr); + } + pdb_page_fault_possible = 0; + if (pdb_page_fault) + { + strcpy (pdb_out_buffer, "E03"); + } + else + { + strcpy (pdb_out_buffer, "OK"); + } + + ptr = 0; + } + if (ptr) + { + strcpy (pdb_out_buffer, "E02"); + } + break; + } + case 'T': + { + int id; + + if (hexToInt (&ptr, &id)) + { + strcpy (pdb_out_buffer, "E00"); + +#ifdef PDB_PAST + + switch (pdb_level) /* previous level */ + { + case PDB_LVL_XEN: + { + struct task_struct *p; + id -= PDB_ID_OFFSET; + if ( (p = find_domain_by_id(id)) == NULL) + strcpy (pdb_out_buffer, "E00"); + else + strcpy (pdb_out_buffer, "OK"); + put_task_struct(p); + + pdb_level = PDB_LVL_GUESTOS; + pdb_ctx[pdb_level].ctrl = id; + pdb_ctx[pdb_level].info = id; + break; + } + case PDB_LVL_GUESTOS: + { + if (pdb_level == -1) + { + pdb_level = PDB_LVL_XEN; + } + else + { + pdb_level = PDB_LVL_PROCESS; + pdb_ctx[pdb_level].ctrl = id; + pdb_ctx[pdb_level].info = id; + } + break; + } + case PDB_LVL_PROCESS: + { + if (pdb_level == -1) + { + pdb_level = PDB_LVL_GUESTOS; + } + break; + } + default: + { + printk ("pdb internal error: invalid level [%d]\n", + pdb_level); + } + } + +#endif /* PDB_PAST */ + } + break; + } + } + +exit: + /* reply to the request */ + pdb_put_packet (pdb_out_buffer, ack); + + return go; +} + +/* + * process an input character from the serial line. + * + * return "1" if the character is a gdb debug string + * (and hence shouldn't be further processed). + */ + +int pdb_debug_state = 0; /* small parser state machine */ + +int pdb_serial_input(u_char c, struct pt_regs *regs) +{ + int out = 1; + int loop, count; + unsigned long cr3; + + __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); + + switch (pdb_debug_state) + { + case 0: /* not currently processing debug string */ + if ( c == '$' ) /* start token */ + { + pdb_debug_state = 1; + pdb_in_buffer_ptr = 0; + pdb_in_checksum = 0; + pdb_xmit_checksum = 0; + } + else + { + out = 0; + } + break; + case 1: /* saw '$' */ + if ( c == '#' ) /* checksum token */ + { + pdb_debug_state = 2; + pdb_in_buffer[pdb_in_buffer_ptr] = 0; + } + else + { + pdb_in_checksum += c; + pdb_in_buffer[pdb_in_buffer_ptr++] = c; + } + break; + case 2: /* 1st checksum digit */ + pdb_xmit_checksum = hex(c) << 4; + pdb_debug_state = 3; + break; + case 3: /* 2nd checksum digit */ + pdb_xmit_checksum += hex(c); + if (pdb_in_checksum != pdb_xmit_checksum) + { + pdb_put_char('-'); /* checksum failure */ + printk ("checksum failure [%s.%02x.%02x]\n", pdb_in_buffer, + pdb_in_checksum, pdb_xmit_checksum); + } + else + { + pdb_put_char('+'); /* checksum okay */ + if ( pdb_in_buffer_ptr > 1 && pdb_in_buffer[2] == ':' ) + { + pdb_put_char(pdb_in_buffer[0]); + pdb_put_char(pdb_in_buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(pdb_in_buffer); + for (loop = 3; loop < count; loop++) + pdb_in_buffer[loop - 3] = pdb_in_buffer[loop]; + } + + pdb_process_command (pdb_in_buffer, regs, cr3, + PDB_LIVE_EXCEPTION); + } + pdb_debug_state = 0; + break; + } + + return out; +} + +int hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) return (ch-'a'+10); + if ((ch >= '0') && (ch <= '9')) return (ch-'0'); + if ((ch >= 'A') && (ch <= 'F')) return (ch-'A'+10); + return (-1); +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +char * +mem2hex (mem, buf, count) + char *mem; + char *buf; + int count; +{ + int i; + unsigned char ch; + + for (i = 0; i < count; i++) + { + ch = get_char (mem++); + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +char * +hex2mem (buf, mem, count) + char *buf; + char *mem; + int count; +{ + int i; + unsigned char ch; + + for (i = 0; i < count; i++) + { + ch = hex (*buf++) << 4; + ch = ch + hex (*buf++); + set_char (mem++, ch); + } + return (mem); +} + +int +hexToInt (char **ptr, int *intValue) +{ + int numChars = 0; + int hexValue; + int negative = 0; + + *intValue = 0; + + if (**ptr == '-') + { + negative = 1; + numChars++; + (*ptr)++; + } + + while (**ptr) + { + hexValue = hex (**ptr); + if (hexValue >= 0) + { + *intValue = (*intValue << 4) | hexValue; + numChars++; + } + else + break; + + (*ptr)++; + } + + if ( negative ) + *intValue *= -1; + + return (numChars); +} + +/***********************************************************************/ +/***********************************************************************/ + + +/* + * Add a breakpoint to the list of known breakpoints. + * For now there should only be two or three breakpoints so + * we use a simple linked list. In the future, maybe a red-black tree? + */ +struct pdb_breakpoint breakpoints; + +void pdb_bkpt_add (unsigned long cr3, unsigned long address) +{ + struct pdb_breakpoint *bkpt = kmalloc(sizeof(*bkpt), GFP_KERNEL); + bkpt->cr3 = cr3; + bkpt->address = address; + list_add(&bkpt->list, &breakpoints.list); +} + +/* + * Check to see of the breakpoint is in the list of known breakpoints + * Return 1 if it has been set, NULL otherwise. + */ +struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3, + unsigned long address) +{ + struct list_head *list_entry; + struct pdb_breakpoint *bkpt; + + list_for_each(list_entry, &breakpoints.list) + { + bkpt = list_entry(list_entry, struct pdb_breakpoint, list); + if ( bkpt->cr3 == cr3 && bkpt->address == address ) + return bkpt; + } + + return NULL; +} + +/* + * Remove a breakpoint to the list of known breakpoints. + * Return 1 if the element was not found, otherwise 0. + */ +int pdb_bkpt_remove (unsigned long cr3, unsigned long address) +{ + struct list_head *list_entry; + struct pdb_breakpoint *bkpt; + + list_for_each(list_entry, &breakpoints.list) + { + bkpt = list_entry(list_entry, struct pdb_breakpoint, list); + if ( bkpt->cr3 == cr3 && bkpt->address == address ) + { + list_del(&bkpt->list); + kfree(bkpt); + return 0; + } + } + + return 1; +} + +/* + * Check to see if a memory write is really gdb setting a breakpoint + */ +void pdb_bkpt_check (u_char *buffer, int length, + unsigned long cr3, unsigned long addr) +{ + if (length == 1 && buffer[0] == 'c' && buffer[1] == 'c') + { + /* inserting a new breakpoint */ + pdb_bkpt_add(cr3, addr); + TRC(printk("pdb breakpoint detected at 0x%lx:0x%lx\n", cr3, addr)); + } + else if ( pdb_bkpt_remove(cr3, addr) == 0 ) + { + /* removing a breakpoint */ + TRC(printk("pdb breakpoint cleared at 0x%lx:0x%lx\n", cr3, addr)); + } +} + +/***********************************************************************/ + +int pdb_change_values(u_char *buffer, int length, + unsigned long cr3, unsigned long addr, int rw); +int pdb_change_values_one_page(u_char *buffer, int length, + unsigned long cr3, unsigned long addr, int rw); + +#define __PDB_GET_VAL 1 +#define __PDB_SET_VAL 2 + +/* + * Set memory in a domain's address space + * Set "length" bytes at "address" from "domain" to the values in "buffer". + * Return the number of bytes set, 0 if there was a problem. + */ + +int pdb_set_values(u_char *buffer, int length, + unsigned long cr3, unsigned long addr) +{ + int count = pdb_change_values(buffer, length, cr3, addr, __PDB_SET_VAL); + return count; +} + +/* + * Read memory from a domain's address space. + * Fetch "length" bytes at "address" from "domain" into "buffer". + * Return the number of bytes read, 0 if there was a problem. + */ + +int pdb_get_values(u_char *buffer, int length, + unsigned long cr3, unsigned long addr) +{ + return pdb_change_values(buffer, length, cr3, addr, __PDB_GET_VAL); +} + +/* + * Read or write memory in an address space + */ +int pdb_change_values(u_char *buffer, int length, + unsigned long cr3, unsigned long addr, int rw) +{ + int remaining; /* number of bytes to touch past this page */ + int bytes = 0; + + while ( (remaining = (addr + length - 1) - (addr | (PAGE_SIZE - 1))) > 0) + { + bytes += pdb_change_values_one_page(buffer, length - remaining, + cr3, addr, rw); + buffer = buffer + (2 * (length - remaining)); + length = remaining; + addr = (addr | (PAGE_SIZE - 1)) + 1; + } + + bytes += pdb_change_values_one_page(buffer, length, cr3, addr, rw); + return bytes; +} + +/* + * Change memory in a process' address space in one page + * Read or write "length" bytes at "address" into/from "buffer" + * from the virtual address space referenced by "cr3". + * Return the number of bytes read, 0 if there was a problem. + */ + +int pdb_change_values_one_page(u_char *buffer, int length, + unsigned long cr3, unsigned long addr, int rw) +{ + l2_pgentry_t* l2_table = NULL; + l1_pgentry_t* l1_table = NULL; + u_char *page; + int bytes = 0; + + l2_table = map_domain_mem(cr3); + l2_table += l2_table_offset(addr); + if (!(l2_pgentry_val(*l2_table) & _PAGE_PRESENT)) + { + if (pdb_page_fault_possible == 1) + { + pdb_page_fault = 1; + TRC(printk("pdb: L2 error (0x%lx)\n", addr)); + } + else + { + struct task_struct *p = find_domain_by_id(0); + printk ("pdb error: cr3: 0x%lx dom0cr3: 0x%lx\n", cr3, + p->mm.shadow_mode ? pagetable_val(p->mm.shadow_table) + : pagetable_val(p->mm.pagetable)); + put_task_struct(p); + printk ("pdb error: L2:0x%p (0x%lx)\n", + l2_table, l2_pgentry_val(*l2_table)); + } + goto exit2; + } + + if (l2_pgentry_val(*l2_table) & _PAGE_PSE) + { +#define PSE_PAGE_SHIFT L2_PAGETABLE_SHIFT +#define PSE_PAGE_SIZE (1UL << PSE_PAGE_SHIFT) +#define PSE_PAGE_MASK (~(PSE_PAGE_SIZE-1)) + +#define L1_PAGE_BITS ( (ENTRIES_PER_L1_PAGETABLE - 1) << L1_PAGETABLE_SHIFT ) + +#define pse_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PSE_PAGE_MASK) + + page = map_domain_mem(pse_pgentry_to_phys(*l2_table) + /* 10 bits */ + (addr & L1_PAGE_BITS)); /* 10 bits */ + page += addr & (PAGE_SIZE - 1); /* 12 bits */ + } + else + { + l1_table = map_domain_mem(l2_pgentry_to_phys(*l2_table)); + l1_table += l1_table_offset(addr); + if (!(l1_pgentry_val(*l1_table) & _PAGE_PRESENT)) + { + if (pdb_page_fault_possible == 1) + { + pdb_page_fault = 1; + TRC(printk ("pdb: L1 error (0x%lx)\n", addr)); + } + else + { + printk ("L2:0x%p (0x%lx) L1:0x%p (0x%lx)\n", + l2_table, l2_pgentry_val(*l2_table), + l1_table, l1_pgentry_val(*l1_table)); + } + goto exit1; + } + + page = map_domain_mem(l1_pgentry_to_phys(*l1_table)); + page += addr & (PAGE_SIZE - 1); + } + + switch (rw) + { + case __PDB_GET_VAL: /* read */ + memcpy (buffer, page, length); + bytes = length; + break; + case __PDB_SET_VAL: /* write */ + hex2mem (buffer, page, length); + bytes = length; + break; + default: /* unknown */ + printk ("error: unknown RW flag: %d\n", rw); + return 0; + } + + unmap_domain_mem((void *)page); +exit1: + if (l1_table != NULL) + unmap_domain_mem((void *)l1_table); +exit2: + unmap_domain_mem((void *)l2_table); + + return bytes; +} + +/***********************************************************************/ + +void breakpoint(void); + +/* send the packet in buffer. */ +void pdb_put_packet (unsigned char *buffer, int ack) +{ + unsigned char checksum; + int count; + char ch; + + /* $# */ + /* do */ + { + pdb_put_char ('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) + { + pdb_put_char (ch); + checksum += ch; + count += 1; + } + + pdb_put_char('#'); + pdb_put_char(hexchars[checksum >> 4]); + pdb_put_char(hexchars[checksum % 16]); + } + + if (ack) + { + if ((ch = pdb_get_char()) != '+') + { + printk(" pdb return error: %c 0x%x [%s]\n", ch, ch, buffer); + } + } +} + +void pdb_get_packet(char *buffer) +{ + int count; + char ch; + unsigned char checksum = 0; + unsigned char xmitcsum = 0; + + do + { + while ((ch = pdb_get_char()) != '$'); + + count = 0; + checksum = 0; + + while (count < BUFMAX) + { + ch = pdb_get_char(); + if (ch == '#') break; + checksum += ch; + buffer[count] = ch; + count++; + } + buffer[count] = 0; + + if (ch == '#') + { + xmitcsum = hex(pdb_get_char()) << 4; + xmitcsum += hex(pdb_get_char()); + + if (xmitcsum == checksum) + { + pdb_put_char('+'); + if (buffer[2] == ':') + { + printk ("pdb: obsolete gdb packet (sequence ID)\n"); + } + } + else + { + pdb_put_char('-'); + } + } + } while (checksum != xmitcsum); + + return; +} + +/* + * process a machine interrupt or exception + * Return 1 if pdb is not interested in the exception; it should + * be propagated to the guest os. + */ + +int pdb_handle_exception(int exceptionVector, + struct pt_regs *xen_regs) +{ + int signal = 0; + struct pdb_breakpoint* bkpt; + int watchdog_save; + unsigned long cr3; + + __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); + + /* If the exception is an int3 from user space then pdb is only + interested if it re-wrote an instruction set the breakpoint. + This occurs when leaving a system call from a domain. + */ + if ( exceptionVector == 3 && + (xen_regs->xcs & 3) == 3 && + xen_regs->eip != pdb_system_call_next_addr + 1) + { + TRC(printf("pdb: user bkpt (0x%x) at 0x%x:0x%lx:0x%lx\n", + exceptionVector, xen_regs->xcs & 3, cr3, xen_regs->eip)); + return 1; + } + + /* + * If PDB didn't set the breakpoint, is not single stepping, + * is not entering a system call in a domain, + * the user didn't press the magic debug key, + * then we don't handle the exception. + */ + bkpt = pdb_bkpt_search(cr3, xen_regs->eip - 1); + if ( (bkpt == NULL) && + !pdb_stepping && + !pdb_system_call && + xen_regs->eip != pdb_system_call_next_addr + 1 && + (exceptionVector != KEYPRESS_EXCEPTION) && + xen_regs->eip < 0xc0000000) /* Linux-specific for now! */ + { + TRC(printf("pdb: user bkpt (0x%x) at 0x%lx:0x%lx\n", + exceptionVector, cr3, xen_regs->eip)); + return 1; + } + + printk("pdb_handle_exception [0x%x][0x%lx:0x%lx]\n", + exceptionVector, cr3, xen_regs->eip); + + if ( pdb_stepping ) + { + /* Stepped one instruction; now return to normal execution. */ + xen_regs->eflags &= ~X86_EFLAGS_TF; + pdb_stepping = 0; + } + + if ( pdb_system_call ) + { + pdb_system_call = 0; + + pdb_linux_syscall_exit_bkpt (xen_regs, &pdb_ctx); + + /* we don't have a saved breakpoint so we need to rewind eip */ + xen_regs->eip--; + + /* if ther user doesn't care about breaking when entering a + system call then we'll just ignore the exception */ + if ( (pdb_ctx.system_call & 0x01) == 0 ) + { + return 0; + } + } + + if ( exceptionVector == BREAKPT_EXCEPTION && bkpt != NULL) + { + /* Executed Int3: replace breakpoint byte with real program byte. */ + xen_regs->eip--; + } + + /* returning to user space after a system call */ + if ( xen_regs->eip == pdb_system_call_next_addr + 1) + { + u_char instr[2]; /* REALLY REALLY REALLY STUPID */ + + mem2hex (&pdb_system_call_leave_instr, instr, sizeof(instr)); + + pdb_linux_set_values (instr, 1, pdb_system_call_next_addr, + pdb_ctx.process, pdb_ctx.ptbr); + + pdb_system_call_next_addr = 0; + pdb_system_call_leave_instr = 0; + + /* manually rewind eip */ + xen_regs->eip--; + + /* if the user doesn't care about breaking when returning + to user space after a system call then we'll just ignore + the exception */ + if ( (pdb_ctx.system_call & 0x02) == 0 ) + { + return 0; + } + } + + /* Generate a signal for GDB. */ + switch ( exceptionVector ) + { + case KEYPRESS_EXCEPTION: + signal = 2; break; /* SIGINT */ + case DEBUG_EXCEPTION: + signal = 5; break; /* SIGTRAP */ + case BREAKPT_EXCEPTION: + signal = 5; break; /* SIGTRAP */ + default: + printk("pdb: can't generate signal for unknown exception vector %d\n", + exceptionVector); + break; + } + + pdb_out_buffer[0] = 'S'; + pdb_out_buffer[1] = hexchars[signal >> 4]; + pdb_out_buffer[2] = hexchars[signal % 16]; + pdb_out_buffer[3] = 0; + pdb_put_packet(pdb_out_buffer, 1); + + watchdog_save = watchdog_on; + watchdog_on = 0; + + do { + pdb_out_buffer[0] = 0; + pdb_get_packet(pdb_in_buffer); + } + while ( pdb_process_command(pdb_in_buffer, xen_regs, cr3, signal) == 0 ); + + watchdog_on = watchdog_save; + + return 0; +} + +void pdb_key_pressed(u_char key, void *dev_id, struct pt_regs *regs) +{ + pdb_handle_exception(KEYPRESS_EXCEPTION, regs); + return; +} + +void initialize_pdb() +{ + extern char opt_pdb[]; + + /* Certain state must be initialised even when PDB will not be used. */ + memset((void *) &breakpoints, 0, sizeof(breakpoints)); + INIT_LIST_HEAD(&breakpoints.list); + pdb_stepping = 0; + + if ( strcmp(opt_pdb, "none") == 0 ) + return; + + if ( (pdb_serhnd = parse_serial_handle(opt_pdb)) == -1 ) + { + printk("error: failed to initialize PDB on port %s\n", opt_pdb); + return; + } + + pdb_ctx.valid = 1; + pdb_ctx.domain = -1; + pdb_ctx.process = -1; + pdb_ctx.system_call = 0; + pdb_ctx.ptbr = 0; + + printk("pdb: pervasive debugger (%s) www.cl.cam.ac.uk/netos/pdb\n", + opt_pdb); + + /* Acknowledge any spurious GDB packets. */ + pdb_put_char('+'); + + add_key_handler('D', pdb_key_pressed, "enter pervasive debugger"); + + pdb_initialized = 1; +} + +void breakpoint(void) +{ + if ( pdb_initialized ) + asm("int $3"); +} diff --git a/xen/arch/x86/process.c b/xen/arch/x86/process.c new file mode 100644 index 0000000000..7524e9c5c5 --- /dev/null +++ b/xen/arch/x86/process.c @@ -0,0 +1,364 @@ +/* + * linux/arch/i386/kernel/process.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * This file handles the architecture-dependent parts of process handling.. + */ + +#define __KERNEL_SYSCALLS__ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int hlt_counter; + +void disable_hlt(void) +{ + hlt_counter++; +} + +void enable_hlt(void) +{ + hlt_counter--; +} + +/* + * We use this if we don't have any better + * idle routine.. + */ +static void default_idle(void) +{ + if (!hlt_counter) { + __cli(); + if (!current->hyp_events && !softirq_pending(smp_processor_id())) + safe_halt(); + else + __sti(); + } +} + +void continue_cpu_idle_loop(void) +{ + int cpu = smp_processor_id(); + for ( ; ; ) + { + irq_stat[cpu].idle_timestamp = jiffies; + while (!current->hyp_events && !softirq_pending(cpu)) + default_idle(); + do_hyp_events(); + do_softirq(); + } +} + +void startup_cpu_idle_loop(void) +{ + /* Just some sanity to ensure that the scheduler is set up okay. */ + ASSERT(current->domain == IDLE_DOMAIN_ID); + (void)wake_up(current); + __enter_scheduler(); + + /* + * Declares CPU setup done to the boot processor. + * Therefore memory barrier to ensure state is visible. + */ + smp_mb(); + init_idle(); + + continue_cpu_idle_loop(); +} + +static long no_idt[2]; +static int reboot_mode; +int reboot_thru_bios = 0; + +#ifdef CONFIG_SMP +int reboot_smp = 0; +static int reboot_cpu = -1; +/* shamelessly grabbed from lib/vsprintf.c for readability */ +#define is_digit(c) ((c) >= '0' && (c) <= '9') +#endif + + +static inline void kb_wait(void) +{ + int i; + + for (i=0; i<0x10000; i++) + if ((inb_p(0x64) & 0x02) == 0) + break; +} + + +void machine_restart(char * __unused) +{ + extern int opt_noreboot; +#ifdef CONFIG_SMP + int cpuid; +#endif + + if ( opt_noreboot ) + { + printk("Reboot disabled on cmdline: require manual reset\n"); + for ( ; ; ) __asm__ __volatile__ ("hlt"); + } + +#ifdef CONFIG_SMP + cpuid = GET_APIC_ID(apic_read(APIC_ID)); + + /* KAF: Need interrupts enabled for safe IPI. */ + __sti(); + + if (reboot_smp) { + + /* check to see if reboot_cpu is valid + if its not, default to the BSP */ + if ((reboot_cpu == -1) || + (reboot_cpu > (NR_CPUS -1)) || + !(phys_cpu_present_map & (1<shared_info->execution_context; + + /* + * Initial register values: + * DS,ES,FS,GS = FLAT_RING1_DS + * CS:EIP = FLAT_RING1_CS:start_pc + * SS:ESP = FLAT_RING1_DS:start_stack + * ESI = start_info + * [EAX,EBX,ECX,EDX,EDI,EBP are zero] + */ + ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_RING1_DS; + ec->cs = FLAT_RING1_CS; + ec->eip = start_pc; + ec->esp = start_stack; + ec->esi = start_info; + + __save_flags(ec->eflags); + ec->eflags |= X86_EFLAGS_IF; + + /* No fast trap at start of day. */ + SET_DEFAULT_FAST_TRAP(&p->thread); +} + + +/* + * This special macro can be used to load a debugging register + */ +#define loaddebug(thread,register) \ + __asm__("movl %0,%%db" #register \ + : /* no output */ \ + :"r" (thread->debugreg[register])) + + +void switch_to(struct task_struct *prev_p, struct task_struct *next_p) +{ + struct thread_struct *next = &next_p->thread; + struct tss_struct *tss = init_tss + smp_processor_id(); + execution_context_t *stack_ec = get_execution_context(); + int i; + + __cli(); + + /* Switch guest general-register state. */ + if ( !is_idle_task(prev_p) ) + { + memcpy(&prev_p->shared_info->execution_context, + stack_ec, + sizeof(*stack_ec)); + unlazy_fpu(prev_p); + CLEAR_FAST_TRAP(&prev_p->thread); + } + + if ( !is_idle_task(next_p) ) + { + memcpy(stack_ec, + &next_p->shared_info->execution_context, + sizeof(*stack_ec)); + + /* + * This is sufficient! If the descriptor DPL differs from CS RPL then + * we'll #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared + * automatically. If SS RPL or DPL differs from CS RPL then we'll #GP. + */ + if ( (stack_ec->cs & 3) == 0 ) + stack_ec->cs = FLAT_RING1_CS; + if ( (stack_ec->ss & 3) == 0 ) + stack_ec->ss = FLAT_RING1_DS; + + SET_FAST_TRAP(&next_p->thread); + + /* Switch the guest OS ring-1 stack. */ + tss->esp1 = next->guestos_sp; + tss->ss1 = next->guestos_ss; + + /* Maybe switch the debug registers. */ + if ( unlikely(next->debugreg[7]) ) + { + loaddebug(next, 0); + loaddebug(next, 1); + loaddebug(next, 2); + loaddebug(next, 3); + /* no 4 and 5 */ + loaddebug(next, 6); + loaddebug(next, 7); + } + + /* Switch page tables. */ + write_ptbase(&next_p->mm); + tlb_clocktick(); + } + + if ( unlikely(prev_p->io_bitmap != NULL) || + unlikely(next_p->io_bitmap != NULL) ) + { + if ( next_p->io_bitmap != NULL ) + { + /* Copy in the appropriate parts of the IO bitmap. We use the + * selector to copy only the interesting parts of the bitmap. */ + + u64 old_sel = ~0ULL; /* IO bitmap selector for previous task. */ + + if ( prev_p->io_bitmap != NULL) + { + old_sel = prev_p->io_bitmap_sel; + + /* Replace any areas of the IO bitmap that had bits cleared. */ + for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) + if ( !test_bit(i, &prev_p->io_bitmap_sel) ) + memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], + &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS], + IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); + } + + /* Copy in any regions of the new task's bitmap that have bits + * clear and we haven't already dealt with. */ + for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) + { + if ( test_bit(i, &old_sel) + && !test_bit(i, &next_p->io_bitmap_sel) ) + memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], + &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS], + IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); + } + + tss->bitmap = IO_BITMAP_OFFSET; + + } + else + { + /* In this case, we're switching FROM a task with IO port access, + * to a task that doesn't use the IO bitmap. We set any TSS bits + * that might have been cleared, ready for future use. */ + for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) + if ( !test_bit(i, &prev_p->io_bitmap_sel) ) + memset(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], + 0xFF, IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); + + /* + * a bitmap offset pointing outside of the TSS limit + * causes a nicely controllable SIGSEGV if a process + * tries to use a port IO instruction. The first + * sys_ioperm() call sets up the bitmap properly. + */ + tss->bitmap = INVALID_IO_BITMAP_OFFSET; + } + } + + set_current(next_p); + + /* Switch GDT and LDT. */ + __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt)); + load_LDT(next_p); + + __sti(); +} + + +/* XXX Currently the 'domain' field is ignored! XXX */ +long do_iopl(domid_t domain, unsigned int new_io_pl) +{ + execution_context_t *ec = get_execution_context(); + ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12); + return 0; +} diff --git a/xen/arch/x86/rwlock.c b/xen/arch/x86/rwlock.c new file mode 100644 index 0000000000..2ef7af16b1 --- /dev/null +++ b/xen/arch/x86/rwlock.c @@ -0,0 +1,28 @@ +#include +#include + +#if defined(CONFIG_SMP) +asm( +".align 4\n" +".globl __write_lock_failed\n" +"__write_lock_failed:\n" +" " LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" +"1: rep; nop\n" +" cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n" +" jne 1b\n" +" " LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n" +" jnz __write_lock_failed\n" +" ret\n" + +".align 4\n" +".globl __read_lock_failed\n" +"__read_lock_failed:\n" +" lock ; incl (%eax)\n" +"1: rep; nop\n" +" cmpl $1,(%eax)\n" +" js 1b\n" +" lock ; decl (%eax)\n" +" js __read_lock_failed\n" +" ret\n" +); +#endif diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c new file mode 100644 index 0000000000..70610339b3 --- /dev/null +++ b/xen/arch/x86/setup.c @@ -0,0 +1,450 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +char ignore_irq13; /* set if exception 16 works */ +struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; + +/* Lots of nice things, since we only target PPro+. */ +unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE; +EXPORT_SYMBOL(mmu_cr4_features); + +unsigned long wait_init_idle; + +struct task_struct *idle_task[NR_CPUS] = { &idle0_task }; + +#ifdef CONFIG_ACPI_INTERPRETER +int acpi_disabled = 0; +#else +int acpi_disabled = 1; +#endif +EXPORT_SYMBOL(acpi_disabled); + +#ifdef CONFIG_ACPI_BOOT +extern int __initdata acpi_ht; +int acpi_force __initdata = 0; +#endif + +int phys_proc_id[NR_CPUS]; +int logical_proc_id[NR_CPUS]; + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __init have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +void __init get_cpu_vendor(struct cpuinfo_x86 *c) +{ + char *v = c->x86_vendor_id; + + if (!strcmp(v, "GenuineIntel")) + c->x86_vendor = X86_VENDOR_INTEL; + else if (!strcmp(v, "AuthenticAMD")) + c->x86_vendor = X86_VENDOR_AMD; + else if (!strcmp(v, "CyrixInstead")) + c->x86_vendor = X86_VENDOR_CYRIX; + else if (!strcmp(v, "UMC UMC UMC ")) + c->x86_vendor = X86_VENDOR_UMC; + else if (!strcmp(v, "CentaurHauls")) + c->x86_vendor = X86_VENDOR_CENTAUR; + else if (!strcmp(v, "NexGenDriven")) + c->x86_vendor = X86_VENDOR_NEXGEN; + else if (!strcmp(v, "RiseRiseRise")) + c->x86_vendor = X86_VENDOR_RISE; + else if (!strcmp(v, "GenuineTMx86") || + !strcmp(v, "TransmetaCPU")) + c->x86_vendor = X86_VENDOR_TRANSMETA; + else + c->x86_vendor = X86_VENDOR_UNKNOWN; +} + +static void __init init_intel(struct cpuinfo_x86 *c) +{ + extern int opt_noht, opt_noacpi; + + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ + if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) + clear_bit(X86_FEATURE_SEP, &c->x86_capability); + + if ( opt_noht ) + { + opt_noacpi = 1; /* Virtual CPUs only appear in ACPI tables. */ + clear_bit(X86_FEATURE_HT, &c->x86_capability[0]); + } + +#ifdef CONFIG_SMP + if ( test_bit(X86_FEATURE_HT, &c->x86_capability) ) + { + u32 eax, ebx, ecx, edx; + int initial_apic_id, siblings, cpu = smp_processor_id(); + + cpuid(1, &eax, &ebx, &ecx, &edx); + siblings = (ebx & 0xff0000) >> 16; + + if ( siblings <= 1 ) + { + printk(KERN_INFO "CPU#%d: Hyper-Threading is disabled\n", cpu); + } + else if ( siblings > 2 ) + { + panic("We don't support more than two logical CPUs per package!"); + } + else + { + initial_apic_id = ebx >> 24 & 0xff; + phys_proc_id[cpu] = initial_apic_id >> 1; + logical_proc_id[cpu] = initial_apic_id & 1; + printk(KERN_INFO "CPU#%d: Physical ID: %d, Logical ID: %d\n", + cpu, phys_proc_id[cpu], logical_proc_id[cpu]); + } + } +#endif +} + +static void __init init_amd(struct cpuinfo_x86 *c) +{ + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, &c->x86_capability); + + switch(c->x86) + { + case 5: + panic("AMD K6 is not supported.\n"); + case 6: /* An Athlon/Duron. We can trust the BIOS probably */ + break; + } +} + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +void __init identify_cpu(struct cpuinfo_x86 *c) +{ + int junk, i, cpu = smp_processor_id(); + u32 xlvl, tfms; + + phys_proc_id[cpu] = cpu; + logical_proc_id[cpu] = 0; + + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + if ( !have_cpuid_p() ) + panic("Ancient processors not supported\n"); + + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c); + + if ( c->cpuid_level == 0 ) + panic("Decrepit CPUID not supported\n"); + + cpuid(0x00000001, &tfms, &junk, &junk, + &c->x86_capability[0]); + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + c->x86_mask = tfms & 15; + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) + c->x86_capability[1] = cpuid_edx(0x80000001); + } + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ( (xlvl & 0xffff0000) == 0x80860000 ) { + if ( xlvl >= 0x80860001 ) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + + printk("CPU%d: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", + smp_processor_id(), + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_vendor); + + switch ( c->x86_vendor ) { + case X86_VENDOR_INTEL: + init_intel(c); + break; + case X86_VENDOR_AMD: + init_amd(c); + break; + case X86_VENDOR_UNKNOWN: /* Connectix Virtual PC reports this */ + break; + case X86_VENDOR_CENTAUR: + break; + default: + printk("Unknown CPU identifier (%d): continuing anyway, " + "but might fail.\n", c->x86_vendor); + } + + printk("CPU caps: %08x %08x %08x %08x\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if ( c != &boot_cpu_data ) { + /* AND the already accumulated flags with these */ + for ( i = 0 ; i < NCAPINTS ; i++ ) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } +} + + +unsigned long cpu_initialized; +void __init cpu_init(void) +{ + int nr = smp_processor_id(); + struct tss_struct * t = &init_tss[nr]; + + if ( test_and_set_bit(nr, &cpu_initialized) ) + panic("CPU#%d already initialized!!!\n", nr); + printk("Initializing CPU#%d\n", nr); + + /* Set up GDT and IDT. */ + SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES); + SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS); + __asm__ __volatile__("lgdt %0": "=m" (*current->mm.gdt)); + __asm__ __volatile__("lidt %0": "=m" (idt_descr)); + + /* No nested task. */ + __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); + + /* Ensure FPU gets initialised for each domain. */ + stts(); + + /* Set up and load the per-CPU TSS and LDT. */ + t->ss0 = __HYPERVISOR_DS; + t->esp0 = get_stack_top(); + set_tss_desc(nr,t); + load_TR(nr); + __asm__ __volatile__("lldt %%ax"::"a" (0)); + + /* Clear all 6 debug registers. */ +#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); + CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); +#undef CD + + /* Install correct page table. */ + write_ptbase(¤t->mm); + + init_idle_task(); +} + +static void __init do_initcalls(void) +{ + initcall_t *call; + for ( call = &__initcall_start; call < &__initcall_end; call++ ) + (*call)(); +} + +/* + * IBM-compatible BIOSes place drive info tables at initial interrupt + * vectors 0x41 and 0x46. These are in the for of 16-bit-mode far ptrs. + */ +struct drive_info_struct { unsigned char dummy[32]; } drive_info; +void get_bios_driveinfo(void) +{ + unsigned long seg, off, tab1, tab2; + + off = (unsigned long)*(unsigned short *)(4*0x41+0); + seg = (unsigned long)*(unsigned short *)(4*0x41+2); + tab1 = (seg<<4) + off; + + off = (unsigned long)*(unsigned short *)(4*0x46+0); + seg = (unsigned long)*(unsigned short *)(4*0x46+2); + tab2 = (seg<<4) + off; + + printk("Reading BIOS drive-info tables at 0x%05lx and 0x%05lx\n", + tab1, tab2); + + memcpy(drive_info.dummy+ 0, (char *)tab1, 16); + memcpy(drive_info.dummy+16, (char *)tab2, 16); +} + + +unsigned long pci_mem_start = 0x10000000; + +void __init start_of_day(void) +{ + extern void trap_init(void); + extern void init_IRQ(void); + extern void time_init(void); + extern void timer_bh(void); + extern void init_timervecs(void); + extern void ac_timer_init(void); + extern void initialize_keytable(); + extern void initialize_keyboard(void); + extern int opt_nosmp, opt_watchdog, opt_noacpi, opt_ignorebiostables; + extern int do_timer_lists_from_pit; + unsigned long low_mem_size; + +#ifdef MEMORY_GUARD + /* Unmap the first page of CPU0's stack. */ + extern unsigned long cpu0_stack[]; + memguard_guard_range(cpu0_stack, PAGE_SIZE); +#endif + + open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, + (void *)new_tlbflush_clock_period, + NULL); + + if ( opt_watchdog ) + nmi_watchdog = NMI_LOCAL_APIC; + + /* + * We do this early, but tables are in the lowest 1MB (usually + * 0xfe000-0xfffff). Therefore they're unlikely to ever get clobbered. + */ + get_bios_driveinfo(); + + /* Tell the PCI layer not to allocate too close to the RAM area.. */ + low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff; + if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size; + + identify_cpu(&boot_cpu_data); /* get CPU type info */ + if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR); + if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT); +#ifdef CONFIG_SMP + if ( opt_ignorebiostables ) + { + opt_nosmp = 1; /* No SMP without configuration */ + opt_noacpi = 1; /* ACPI will just confuse matters also */ + } + else + { + find_smp_config(); + smp_alloc_memory(); /* trampoline which other CPUs jump at */ + } +#endif + paging_init(); /* not much here now, but sets up fixmap */ + if ( !opt_noacpi ) + acpi_boot_init(); +#ifdef CONFIG_SMP + if ( smp_found_config ) + get_smp_config(); +#endif + domain_init(); + scheduler_init(); + trap_init(); + init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */ + time_init(); /* installs software handler for HZ clock. */ + softirq_init(); + init_timervecs(); + init_bh(TIMER_BH, timer_bh); + init_apic_mappings(); /* make APICs addressable in our pagetables. */ + +#ifndef CONFIG_SMP + APIC_init_uniprocessor(); +#else + if ( opt_nosmp ) + APIC_init_uniprocessor(); + else + smp_boot_cpus(); + /* + * Does loads of stuff, including kicking the local + * APIC, and the IO APIC after other CPUs are booted. + * Each IRQ is preferably handled by IO-APIC, but + * fall thru to 8259A if we have to (but slower). + */ +#endif + + __sti(); + + initialize_keytable(); /* call back handling for key codes */ + + serial_init_stage2(); + initialize_keyboard(); /* setup keyboard (also for debugging) */ + +#ifdef XEN_DEBUGGER + initialize_pdb(); /* pervasive debugger */ +#endif + + if ( !cpu_has_apic ) + { + do_timer_lists_from_pit = 1; + if ( smp_num_cpus != 1 ) + panic("We need local APICs on SMP machines!"); + } + + ac_timer_init(); /* init accurate timers */ + init_xen_time(); /* initialise the time */ + schedulers_start(); /* start scheduler for each CPU */ + + check_nmi_watchdog(); + +#ifdef CONFIG_PCI + pci_init(); +#endif + do_initcalls(); + +#ifdef CONFIG_SMP + wait_init_idle = cpu_online_map; + clear_bit(smp_processor_id(), &wait_init_idle); + smp_threads_ready = 1; + smp_commence(); /* Tell other CPUs that state of the world is stable. */ + while (wait_init_idle) + { + cpu_relax(); + barrier(); + } +#endif + + watchdog_on = 1; +} diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c new file mode 100644 index 0000000000..607c083946 --- /dev/null +++ b/xen/arch/x86/smp.c @@ -0,0 +1,442 @@ +/* + * Intel SMP support routines. + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998-99, 2000 Ingo Molnar + * + * This code is released under the GNU General Public License version 2 or + * later. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP + +/* + * Some notes on x86 processor bugs affecting SMP operation: + * + * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. + * The Linux implications for SMP are handled as follows: + * + * Pentium III / [Xeon] + * None of the E1AP-E3AP errata are visible to the user. + * + * E1AP. see PII A1AP + * E2AP. see PII A2AP + * E3AP. see PII A3AP + * + * Pentium II / [Xeon] + * None of the A1AP-A3AP errata are visible to the user. + * + * A1AP. see PPro 1AP + * A2AP. see PPro 2AP + * A3AP. see PPro 7AP + * + * Pentium Pro + * None of 1AP-9AP errata are visible to the normal user, + * except occasional delivery of 'spurious interrupt' as trap #15. + * This is very rare and a non-problem. + * + * 1AP. Linux maps APIC as non-cacheable + * 2AP. worked around in hardware + * 3AP. fixed in C0 and above steppings microcode update. + * Linux does not use excessive STARTUP_IPIs. + * 4AP. worked around in hardware + * 5AP. symmetric IO mode (normal Linux operation) not affected. + * 'noapic' mode has vector 0xf filled out properly. + * 6AP. 'noapic' mode might be affected - fixed in later steppings + * 7AP. We do not assume writes to the LVT deassering IRQs + * 8AP. We do not enable low power mode (deep sleep) during MP bootup + * 9AP. We do not use mixed mode + */ + +/* + * the following functions deal with sending IPIs between CPUs. + * + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. + */ + +static inline int __prepare_ICR (unsigned int shortcut, int vector) +{ + return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; +} + +static inline int __prepare_ICR2 (unsigned int mask) +{ + return SET_APIC_DEST_FIELD(mask); +} + +static inline void __send_IPI_shortcut(unsigned int shortcut, int vector) +{ + /* + * Subtle. In the case of the 'never do double writes' workaround + * we have to lock out interrupts to be safe. As we don't care + * of the value read we use an atomic rmw access to avoid costly + * cli/sti. Otherwise we use an even cheaper single atomic write + * to the APIC. + */ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * No need to touch the target chip field + */ + cfg = __prepare_ICR(shortcut, vector); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); +} + +void send_IPI_self(int vector) +{ + __send_IPI_shortcut(APIC_DEST_SELF, vector); +} + +static inline void send_IPI_mask(int mask, int vector) +{ + unsigned long cfg; + unsigned long flags; + + __save_flags(flags); + __cli(); + + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(mask); + apic_write_around(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); + + __restore_flags(flags); +} + +static inline void send_IPI_allbutself(int vector) +{ + /* + * if there are no other CPUs in the system then + * we get an APIC send error if we try to broadcast. + * thus we have to avoid sending IPIs in this case. + */ + if (!(smp_num_cpus > 1)) + return; + + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); +} + +/* + * ********* XEN NOTICE ********** + * I've left the following comments lying around as they look liek they might + * be useful to get multiprocessor guest OSes going. However, I suspect the + * issues we face will be quite different so I've ripped out all the + * TLBSTATE logic (I didn't understand it anyway :-). These comments do + * not apply to Xen, therefore! -- Keir (8th Oct 2003). + */ +/* + * Smarter SMP flushing macros. + * c/o Linus Torvalds. + * + * These mean you can really definitely utterly forget about + * writing to user space from interrupts. (Its not allowed anyway). + * + * Optimizations Manfred Spraul + * + * The flush IPI assumes that a thread switch happens in this order: + * [cpu0: the cpu that switches] + * 1) switch_mm() either 1a) or 1b) + * 1a) thread switch to a different mm + * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask); + * Stop ipi delivery for the old mm. This is not synchronized with + * the other cpus, but smp_invalidate_interrupt ignore flush ipis + * for the wrong mm, and in the worst case we perform a superflous + * tlb flush. + * 1a2) set cpu_tlbstate to TLBSTATE_OK + * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 + * was in lazy tlb mode. + * 1a3) update cpu_tlbstate[].active_mm + * Now cpu0 accepts tlb flushes for the new mm. + * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask); + * Now the other cpus will send tlb flush ipis. + * 1a4) change cr3. + * 1b) thread switch without mm change + * cpu_tlbstate[].active_mm is correct, cpu0 already handles + * flush ipis. + * 1b1) set cpu_tlbstate to TLBSTATE_OK + * 1b2) test_and_set the cpu bit in cpu_vm_mask. + * Atomically set the bit [other cpus will start sending flush ipis], + * and test the bit. + * 1b3) if the bit was 0: leave_mm was called, flush the tlb. + * 2) switch %%esp, ie current + * + * The interrupt must handle 2 special cases: + * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. + * - the cpu performs speculative tlb reads, i.e. even if the cpu only + * runs in kernel space, the cpu could load tlb entries for user space + * pages. + * + * The good news is that cpu_tlbstate is local to each cpu, no + * write/read ordering problems. + * + * TLB flush IPI: + * + * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. + * 2) Leave the mm if we are in the lazy tlb mode. + */ + +static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED; +volatile unsigned long flush_cpumask; + +asmlinkage void smp_invalidate_interrupt(void) +{ + ack_APIC_irq(); + perfc_incrc(ipis); + if ( likely(test_and_clear_bit(smp_processor_id(), &flush_cpumask)) ) + local_flush_tlb(); +} + +void flush_tlb_mask(unsigned long mask) +{ + ASSERT(!in_irq()); + + if ( mask & (1 << smp_processor_id()) ) + { + local_flush_tlb(); + mask &= ~(1 << smp_processor_id()); + } + + if ( mask != 0 ) + { + /* + * We are certainly not reentering a flush_lock region on this CPU + * because we are not in an IRQ context. We can therefore wait for the + * other guy to release the lock. This is harder than it sounds because + * local interrupts might be disabled, and he may be waiting for us to + * execute smp_invalidate_interrupt(). We deal with this possibility by + * inlining the meat of that function here. + */ + while ( unlikely(!spin_trylock(&flush_lock)) ) + { + if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) ) + local_flush_tlb(); + rep_nop(); + } + + flush_cpumask = mask; + send_IPI_mask(mask, INVALIDATE_TLB_VECTOR); + while ( flush_cpumask != 0 ) + { + rep_nop(); + barrier(); + } + + spin_unlock(&flush_lock); + } +} + +/* + * NB. Must be called with no locks held and interrupts enabled. + * (e.g., softirq context). + */ +void new_tlbflush_clock_period(void) +{ + spin_lock(&flush_lock); + + /* Someone may acquire the lock and execute the flush before us. */ + if ( ((tlbflush_clock+1) & TLBCLOCK_EPOCH_MASK) != 0 ) + goto out; + + if ( smp_num_cpus > 1 ) + { + /* Flush everyone else. We definitely flushed just before entry. */ + flush_cpumask = ((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id()); + send_IPI_allbutself(INVALIDATE_TLB_VECTOR); + while ( flush_cpumask != 0 ) + { + rep_nop(); + barrier(); + } + } + + /* No need for atomicity: we are the only possible updater. */ + tlbflush_clock++; + + out: + spin_unlock(&flush_lock); +} + +static void flush_tlb_all_pge_ipi(void* info) +{ + __flush_tlb_pge(); +} + +void flush_tlb_all_pge(void) +{ + smp_call_function (flush_tlb_all_pge_ipi,0,1,1); + __flush_tlb_pge(); +} + +void smp_send_event_check_mask(unsigned long cpu_mask) +{ + send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR); +} + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + */ +static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; + +struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +}; + +static struct call_data_struct * call_data; + +/* + * this function sends a 'generic call function' IPI to all other CPUs + * in the system. + */ + +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) +/* + * [SUMMARY] Run a function on all other CPUs. + * The function to run. This must be fast and non-blocking. + * An arbitrary pointer to pass to the function. + * currently unused. + * If true, wait (atomically) until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. Does not return until + * remote CPUs are nearly ready to execute <> or are or have executed. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler, or bottom halfs. + */ +{ + struct call_data_struct data; + int cpus = smp_num_cpus-1; + + if (!cpus) + return 0; + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + ASSERT(local_irq_is_enabled()); + + spin_lock(&call_lock); + + call_data = &data; + wmb(); + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + barrier(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + barrier(); + + spin_unlock(&call_lock); + + return 0; +} + +static void stop_this_cpu (void * dummy) +{ + /* + * Remove this CPU: + */ + clear_bit(smp_processor_id(), &cpu_online_map); + __cli(); + disable_local_APIC(); + for(;;) __asm__("hlt"); +} + +/* + * this function calls the 'stop' function on all other CPUs in the system. + */ + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 1, 0); + smp_num_cpus = 1; + + __cli(); + disable_local_APIC(); + __sti(); +} + +/* + * Nothing to do, as all the work is done automatically when + * we return from the interrupt. + */ +asmlinkage void smp_event_check_interrupt(void) +{ + ack_APIC_irq(); + perfc_incrc(ipis); +} + +asmlinkage void smp_call_function_interrupt(void) +{ + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + + ack_APIC_irq(); + perfc_incrc(ipis); + + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + mb(); + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + (*func)(info); + if (wait) { + mb(); + atomic_inc(&call_data->finished); + } +} + +#endif /* CONFIG_SMP */ diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c new file mode 100644 index 0000000000..635bc55e02 --- /dev/null +++ b/xen/arch/x86/smpboot.c @@ -0,0 +1,950 @@ +/* + * x86 SMP booting functions + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998, 1999, 2000 Ingo Molnar + * + * Much of the core SMP work is based on previous work by Thomas Radke, to + * whom a great many thanks are extended. + * + * Thanks to Intel for making available several different Pentium, + * Pentium Pro and Pentium-II/Xeon MP machines. + * Original development of Linux SMP code supported by Caldera. + * + * This code is released under the GNU General Public License version 2 or + * later. + * + * Fixes + * Felix Koop : NR_CPUS used properly + * Jose Renau : Handle single CPU case. + * Alan Cox : By repeated request 8) - Total BogoMIP report. + * Greg Wright : Fix for kernel stacks panic. + * Erich Boleyn : MP v1.4 and additional changes. + * Matthias Sattler : Changes for 2.1 kernel map. + * Michel Lespinasse : Changes for 2.1 kernel map. + * Michael Chastain : Change trampoline.S to gnu as. + * Alan Cox : Dumb bug: 'B' step PPro's are fine + * Ingo Molnar : Added APIC timers, based on code + * from Jose Renau + * Ingo Molnar : various cleanups and rewrites + * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. + * Maciej W. Rozycki : Bits for genuine 82489DX APICs + * Martin J. Bligh : Added support for multi-quad systems + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP + +/* Set if we find a B stepping CPU */ +static int smp_b_stepping; + +/* Setup configured maximum number of CPUs to activate */ +static int max_cpus = -1; + +/* Total count of live CPUs */ +int smp_num_cpus = 1; + +/* Bitmask of currently online CPUs */ +unsigned long cpu_online_map; + +static volatile unsigned long cpu_callin_map; +static volatile unsigned long cpu_callout_map; + +/* Per CPU bogomips and other parameters */ +struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; + +/* Set when the idlers are all forked */ +int smp_threads_ready; + +/* + * Trampoline 80x86 program as an array. + */ + +extern unsigned char trampoline_data []; +extern unsigned char trampoline_end []; +static unsigned char *trampoline_base; + +/* + * Currently trivial. Write the real->protected mode + * bootstrap into the page concerned. The caller + * has made sure it's suitably aligned. + */ + +static unsigned long __init setup_trampoline(void) +{ + memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); + return virt_to_phys(trampoline_base); +} + +/* + * We are called very early to get the low memory for the + * SMP bootup trampoline page. + */ +void __init smp_alloc_memory(void) +{ + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + trampoline_base = __va(0x90000); +} + +/* + * The bootstrap kernel entry code has set these up. Save them for + * a given CPU + */ + +void __init smp_store_cpu_info(int id) +{ + struct cpuinfo_x86 *c = cpu_data + id; + + *c = boot_cpu_data; + c->pte_quick = 0; + c->pmd_quick = 0; + c->pgd_quick = 0; + c->pgtable_cache_sz = 0; + identify_cpu(c); + /* + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 5 && + c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_model <= 3) + /* + * Remember we have B step Pentia with bugs + */ + smp_b_stepping = 1; +} + +/* + * Architecture specific routine called by the kernel just before init is + * fired off. This allows the BP to have everything in order [we hope]. + * At the end of this all the APs will hit the system scheduling and off + * we go. Each AP will load the system gdt's and jump through the kernel + * init into idle(). At this point the scheduler will one day take over + * and give them jobs to do. smp_callin is a standard routine + * we use to track CPUs as they power up. + */ + +static atomic_t smp_commenced = ATOMIC_INIT(0); + +void __init smp_commence(void) +{ + /* + * Lets the callins below out of their loop. + */ + Dprintk("Setting commenced=1, go go go\n"); + + wmb(); + atomic_set(&smp_commenced,1); +} + +/* + * TSC synchronization. + * + * We first check wether all CPUs have their TSC's synchronized, + * then we print a warning if not, and always resync. + */ + +static atomic_t tsc_start_flag = ATOMIC_INIT(0); +static atomic_t tsc_count_start = ATOMIC_INIT(0); +static atomic_t tsc_count_stop = ATOMIC_INIT(0); +static unsigned long long tsc_values[NR_CPUS]; + +#define NR_LOOPS 5 + +/* + * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit + * multiplication. Not terribly optimized but we need it at boot time only + * anyway. + * + * result == a / b + * == (a1 + a2*(2^32)) / b + * == a1/b + a2*(2^32/b) + * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b + * ^---- (this multiplication can overflow) + */ + +static unsigned long long div64 (unsigned long long a, unsigned long b0) +{ + unsigned int a1, a2; + unsigned long long res; + + a1 = ((unsigned int*)&a)[0]; + a2 = ((unsigned int*)&a)[1]; + + res = a1/b0 + + (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) + + a2 / b0 + + (a2 * (0xffffffff % b0)) / b0; + + return res; +} + +static void __init synchronize_tsc_bp (void) +{ + int i; + unsigned long long t0; + unsigned long long sum, avg; + long long delta; + int buggy = 0; + + printk("checking TSC synchronization across CPUs: "); + + atomic_set(&tsc_start_flag, 1); + wmb(); + + /* + * We loop a few times to get a primed instruction cache, + * then the last pass is more or less synchronized and + * the BP and APs set their cycle counters to zero all at + * once. This reduces the chance of having random offsets + * between the processors, and guarantees that the maximum + * delay between the cycle counters is never bigger than + * the latency of information-passing (cachelines) between + * two CPUs. + */ + for (i = 0; i < NR_LOOPS; i++) { + /* + * all APs synchronize but they loop on '== num_cpus' + */ + while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb(); + atomic_set(&tsc_count_stop, 0); + wmb(); + /* + * this lets the APs save their current TSC: + */ + atomic_inc(&tsc_count_start); + + rdtscll(tsc_values[smp_processor_id()]); + /* + * We clear the TSC in the last loop: + */ + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + /* + * Wait for all APs to leave the synchronization point: + */ + while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb(); + atomic_set(&tsc_count_start, 0); + wmb(); + atomic_inc(&tsc_count_stop); + } + + sum = 0; + for (i = 0; i < smp_num_cpus; i++) { + t0 = tsc_values[i]; + sum += t0; + } + avg = div64(sum, smp_num_cpus); + + sum = 0; + for (i = 0; i < smp_num_cpus; i++) { + delta = tsc_values[i] - avg; + if (delta < 0) + delta = -delta; + /* + * We report bigger than 2 microseconds clock differences. + */ + if (delta > 2*ticks_per_usec) { + long realdelta; + if (!buggy) { + buggy = 1; + printk("\n"); + } + realdelta = div64(delta, ticks_per_usec); + if (tsc_values[i] < avg) + realdelta = -realdelta; + + printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", + i, realdelta); + } + + sum += delta; + } + if (!buggy) + printk("passed.\n"); +} + +static void __init synchronize_tsc_ap (void) +{ + int i; + + /* + * smp_num_cpus is not necessarily known at the time + * this gets called, so we first wait for the BP to + * finish SMP initialization: + */ + while (!atomic_read(&tsc_start_flag)) mb(); + + for (i = 0; i < NR_LOOPS; i++) { + atomic_inc(&tsc_count_start); + while (atomic_read(&tsc_count_start) != smp_num_cpus) mb(); + + rdtscll(tsc_values[smp_processor_id()]); + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + atomic_inc(&tsc_count_stop); + while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb(); + } +} +#undef NR_LOOPS + +static atomic_t init_deasserted; + +void __init smp_callin(void) +{ + int cpuid, phys_id, i; + + /* + * If waken up by an INIT in an 82489DX configuration + * we may get here before an INIT-deassert IPI reaches + * our local APIC. We have to wait for the IPI or we'll + * lock up on an APIC access. + */ + while (!atomic_read(&init_deasserted)); + + /* + * (This works even if the APIC is not enabled.) + */ + phys_id = GET_APIC_ID(apic_read(APIC_ID)); + cpuid = smp_processor_id(); + if (test_and_set_bit(cpuid, &cpu_online_map)) { + printk("huh, phys CPU#%d, CPU#%d already present??\n", + phys_id, cpuid); + BUG(); + } + Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); + + /* + * STARTUP IPIs are fragile beasts as they might sometimes + * trigger some glue motherboard logic. Complete APIC bus + * silence for 1 second, this overestimates the time the + * boot CPU is spending to send the up to 2 STARTUP IPIs + * by a factor of two. This should be enough. + */ + + for ( i = 0; i < 200; i++ ) + { + if ( test_bit(cpuid, &cpu_callout_map) ) break; + mdelay(10); + } + + if (!test_bit(cpuid, &cpu_callout_map)) { + printk("BUG: CPU%d started up but did not get a callout!\n", + cpuid); + BUG(); + } + + /* + * the boot CPU has finished the init stage and is spinning + * on callin_map until we finish. We are free to set up this + * CPU, first the APIC. (this is probably redundant on most + * boards) + */ + + Dprintk("CALLIN, before setup_local_APIC().\n"); + + setup_local_APIC(); + + __sti(); + +#ifdef CONFIG_MTRR + /* + * Must be done before calibration delay is computed + */ + mtrr_init_secondary_cpu (); +#endif + + Dprintk("Stack at about %p\n",&cpuid); + + /* + * Save our processor parameters + */ + smp_store_cpu_info(cpuid); + + if (nmi_watchdog == NMI_LOCAL_APIC) + setup_apic_nmi_watchdog(); + + /* + * Allow the master to continue. + */ + set_bit(cpuid, &cpu_callin_map); + + /* + * Synchronize the TSC with the BP + */ + synchronize_tsc_ap(); +} + +static int cpucount; + +/* + * Activate a secondary processor. + */ +void __init start_secondary(void) +{ + unsigned int cpu = cpucount; + /* 6 bytes suitable for passing to LIDT instruction. */ + unsigned char idt_load[6]; + + extern void cpu_init(void); + + set_current(idle_task[cpu]); + + /* + * Dont put anything before smp_callin(), SMP + * booting is too fragile that we want to limit the + * things done here to the most necessary things. + */ + cpu_init(); + smp_callin(); + + while (!atomic_read(&smp_commenced)) + rep_nop(); + + /* + * At this point, boot CPU has fully initialised the IDT. It is + * now safe to make ourselves a private copy. + */ + idt_tables[cpu] = kmalloc(IDT_ENTRIES*8, GFP_KERNEL); + memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8); + *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1; + *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu]; + __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) ); + + /* + * low-memory mappings have been cleared, flush them from the local TLBs + * too. + */ + local_flush_tlb(); + + startup_cpu_idle_loop(); + + BUG(); +} + +extern struct { + unsigned long esp, ss; +} stack_start; + +/* which physical APIC ID maps to which logical CPU number */ +volatile int physical_apicid_2_cpu[MAX_APICID]; +/* which logical CPU number maps to which physical APIC ID */ +volatile int cpu_2_physical_apicid[NR_CPUS]; + +/* which logical APIC ID maps to which logical CPU number */ +volatile int logical_apicid_2_cpu[MAX_APICID]; +/* which logical CPU number maps to which logical APIC ID */ +volatile int cpu_2_logical_apicid[NR_CPUS]; + +static inline void init_cpu_to_apicid(void) +/* Initialize all maps between cpu number and apicids */ +{ + int apicid, cpu; + + for (apicid = 0; apicid < MAX_APICID; apicid++) { + physical_apicid_2_cpu[apicid] = -1; + logical_apicid_2_cpu[apicid] = -1; + } + for (cpu = 0; cpu < NR_CPUS; cpu++) { + cpu_2_physical_apicid[cpu] = -1; + cpu_2_logical_apicid[cpu] = -1; + } +} + +static inline void map_cpu_to_boot_apicid(int cpu, int apicid) +/* + * set up a mapping between cpu and apicid. Uses logical apicids for multiquad, + * else physical apic ids + */ +{ + physical_apicid_2_cpu[apicid] = cpu; + cpu_2_physical_apicid[cpu] = apicid; +} + +static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid) +/* + * undo a mapping between cpu and apicid. Uses logical apicids for multiquad, + * else physical apic ids + */ +{ + physical_apicid_2_cpu[apicid] = -1; + cpu_2_physical_apicid[cpu] = -1; +} + +#if APIC_DEBUG +static inline void inquire_remote_apic(int apicid) +{ + int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; + char *names[] = { "ID", "VERSION", "SPIV" }; + int timeout, status; + + printk("Inquiring remote APIC #%d...\n", apicid); + + for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { + printk("... APIC #%d %s: ", apicid, names[i]); + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); + apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + + timeout = 0; + do { + udelay(100); + status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; + } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); + + switch (status) { + case APIC_ICR_RR_VALID: + status = apic_read(APIC_RRR); + printk("%08x\n", status); + break; + default: + printk("failed\n"); + } + } +} +#endif + + +static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip) +{ + unsigned long send_status = 0, accept_status = 0; + int maxlvt, timeout, num_starts, j; + + Dprintk("Asserting INIT.\n"); + + /* + * Turn INIT on target chip + */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* + * Send IPI + */ + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT + | APIC_DM_INIT); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + mdelay(10); + + Dprintk("Deasserting INIT.\n"); + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* Send IPI */ + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + atomic_set(&init_deasserted, 1); + + /* + * Should we send STARTUP IPIs ? + * + * Determine this based on the APIC version. + * If we don't have an integrated APIC, don't send the STARTUP IPIs. + */ + if (APIC_INTEGRATED(apic_version[phys_apicid])) + num_starts = 2; + else + num_starts = 0; + + /* + * Run STARTUP IPI loop. + */ + Dprintk("#startup loops: %d.\n", num_starts); + + maxlvt = get_maxlvt(); + + for (j = 1; j <= num_starts; j++) { + Dprintk("Sending STARTUP #%d.\n",j); + + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + Dprintk("After apic_write.\n"); + + /* + * STARTUP IPI + */ + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* Boot on the stack */ + /* Kick the second */ + apic_write_around(APIC_ICR, APIC_DM_STARTUP + | (start_eip >> 12)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(300); + + Dprintk("Startup point 1.\n"); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + /* + * Due to the Pentium erratum 3AP. + */ + if (maxlvt > 3) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + } + accept_status = (apic_read(APIC_ESR) & 0xEF); + if (send_status || accept_status) + break; + } + Dprintk("After Startup.\n"); + + if (send_status) + printk("APIC never delivered???\n"); + if (accept_status) + printk("APIC delivery error (%lx).\n", accept_status); + + return (send_status | accept_status); +} + +extern unsigned long cpu_initialized; + +static void __init do_boot_cpu (int apicid) +/* + * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad + * (ie clustered apic addressing mode), this is a LOGICAL apic ID. + */ +{ + struct task_struct *idle; + unsigned long boot_error = 0; + int timeout, cpu; + unsigned long start_eip, stack; + + cpu = ++cpucount; + + if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL ) + panic("failed 'createdomain' for CPU %d", cpu); + + set_bit(PF_IDLETASK, &idle->flags); + + idle->mm.pagetable = mk_pagetable(__pa(idle_pg_table)); + + map_cpu_to_boot_apicid(cpu, apicid); + + SET_DEFAULT_FAST_TRAP(&idle->thread); + + idle_task[cpu] = idle; + + /* start_eip had better be page-aligned! */ + start_eip = setup_trampoline(); + + /* So we see what's up. */ + printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); + + stack = __pa(__get_free_pages(GFP_KERNEL, 1)); + stack_start.esp = stack + STACK_SIZE - STACK_RESERVED; + + /* Debug build: detect stack overflow by setting up a guard page. */ + memguard_guard_range(__va(stack), PAGE_SIZE); + + /* + * This grunge runs the startup process for + * the targeted processor. + */ + + atomic_set(&init_deasserted, 0); + + Dprintk("Setting warm reset code and vector.\n"); + + CMOS_WRITE(0xa, 0xf); + local_flush_tlb(); + Dprintk("1.\n"); + *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; + Dprintk("2.\n"); + *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; + Dprintk("3.\n"); + + /* + * Be paranoid about clearing APIC errors. + */ + if ( APIC_INTEGRATED(apic_version[apicid]) ) + { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } + + /* + * Status is now clean + */ + boot_error = 0; + + /* + * Starting actual IPI sequence... + */ + + boot_error = wakeup_secondary_via_INIT(apicid, start_eip); + + if (!boot_error) { + /* + * allow APs to start initializing. + */ + Dprintk("Before Callout %d.\n", cpu); + set_bit(cpu, &cpu_callout_map); + Dprintk("After Callout %d.\n", cpu); + + /* + * Wait 5s total for a response + */ + for (timeout = 0; timeout < 50000; timeout++) { + if (test_bit(cpu, &cpu_callin_map)) + break; /* It has booted */ + udelay(100); + } + + if (test_bit(cpu, &cpu_callin_map)) { + /* number CPUs logically, starting from 1 (BSP is 0) */ + printk("CPU%d has booted.\n", cpu); + } else { + boot_error= 1; + if (*((volatile unsigned long *)phys_to_virt(start_eip)) + == 0xA5A5A5A5) + /* trampoline started but...? */ + printk("Stuck ??\n"); + else + /* trampoline code not run */ + printk("Not responding.\n"); +#if APIC_DEBUG + inquire_remote_apic(apicid); +#endif + } + } + if (boot_error) { + /* Try to put things back the way they were before ... */ + unmap_cpu_to_boot_apicid(cpu, apicid); + clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */ + clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ + clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */ + cpucount--; + } +} + + +/* + * Cycle through the processors sending APIC IPIs to boot each. + */ + +static int boot_cpu_logical_apicid; +/* Where the IO area was mapped on multiquad, always 0 otherwise */ +void *xquad_portio = NULL; + +void __init smp_boot_cpus(void) +{ + int apicid, bit; + +#ifdef CONFIG_MTRR + /* Must be done before other processors booted */ + mtrr_init_boot_cpu (); +#endif + /* Initialize the logical to physical CPU number mapping */ + init_cpu_to_apicid(); + + /* + * Setup boot CPU information + */ + smp_store_cpu_info(0); /* Final full version of the data */ + printk("CPU%d booted\n", 0); + + /* + * We have the boot CPU online for sure. + */ + set_bit(0, &cpu_online_map); + boot_cpu_logical_apicid = logical_smp_processor_id(); + map_cpu_to_boot_apicid(0, boot_cpu_apicid); + + /* + * If we couldnt find an SMP configuration at boot time, + * get out of here now! + */ + if (!smp_found_config) { + printk("SMP motherboard not detected.\n"); + io_apic_irqs = 0; + cpu_online_map = phys_cpu_present_map = 1; + smp_num_cpus = 1; + if (APIC_init_uniprocessor()) + printk("Local APIC not detected." + " Using dummy APIC emulation.\n"); + goto smp_done; + } + + /* + * Should not be necessary because the MP table should list the boot + * CPU too, but we do it for the sake of robustness anyway. + */ + if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) { + printk("weird, boot CPU (#%d) not listed by the BIOS.\n", + boot_cpu_physical_apicid); + phys_cpu_present_map |= (1 << hard_smp_processor_id()); + } + + /* + * If we couldn't find a local APIC, then get out of here now! + */ + if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && + !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) { + printk("BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); + io_apic_irqs = 0; + cpu_online_map = phys_cpu_present_map = 1; + smp_num_cpus = 1; + goto smp_done; + } + + verify_local_APIC(); + + /* + * If SMP should be disabled, then really disable it! + */ + if (!max_cpus) { + smp_found_config = 0; + printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n"); + io_apic_irqs = 0; + cpu_online_map = phys_cpu_present_map = 1; + smp_num_cpus = 1; + goto smp_done; + } + + connect_bsp_APIC(); + setup_local_APIC(); + + if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) + BUG(); + + /* + * Scan the CPU present map and fire up the other CPUs via do_boot_cpu + * + * In clustered apic mode, phys_cpu_present_map is a constructed thus: + * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the + * clustered apic ID. + */ + Dprintk("CPU present map: %lx\n", phys_cpu_present_map); + + for (bit = 0; bit < NR_CPUS; bit++) { + apicid = cpu_present_to_apicid(bit); + /* + * Don't even attempt to start the boot CPU! + */ + if (apicid == boot_cpu_apicid) + continue; + + if (!(phys_cpu_present_map & (1 << bit))) + continue; + if ((max_cpus >= 0) && (max_cpus <= cpucount+1)) + continue; + + do_boot_cpu(apicid); + + /* + * Make sure we unmap all failed CPUs + */ + if ((boot_apicid_to_cpu(apicid) == -1) && + (phys_cpu_present_map & (1 << bit))) + printk("CPU #%d not responding - cannot use it.\n", + apicid); + } + + /* + * Cleanup possible dangling ends... + */ + /* + * Install writable page 0 entry to set BIOS data area. + */ + local_flush_tlb(); + + /* + * Paranoid: Set warm reset code and vector here back + * to default values. + */ + CMOS_WRITE(0, 0xf); + + *((volatile long *) phys_to_virt(0x467)) = 0; + + if (!cpucount) { + printk("Error: only one processor found.\n"); + } else { + printk("Total of %d processors activated.\n", cpucount+1); + } + smp_num_cpus = cpucount + 1; + + if (smp_b_stepping) + printk("WARNING: SMP operation may" + " be unreliable with B stepping processors.\n"); + Dprintk("Boot done.\n"); + + /* + * Here we can be sure that there is an IO-APIC in the system. Let's + * go and set it up: + */ + if ( nr_ioapics ) setup_IO_APIC(); + + /* Set up all local APIC timers in the system. */ + setup_APIC_clocks(); + + /* Synchronize the TSC with the AP(s). */ + if ( cpucount ) synchronize_tsc_bp(); + + smp_done: + ; +} + +#endif /* CONFIG_SMP */ diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c new file mode 100644 index 0000000000..9cd6da1955 --- /dev/null +++ b/xen/arch/x86/time.c @@ -0,0 +1,386 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2002-2003 University of Cambridge + **************************************************************************** + * + * File: i386/time.c + * Author: Rolf Neugebar & Keir Fraser + * + * Environment: Xen Hypervisor + * Description: modified version of Linux' time.c + * implements system and wall clock time. + * based on freebsd's implementation. + */ + +/* + * linux/arch/i386/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +extern rwlock_t xtime_lock; +extern unsigned long wall_jiffies; + +/* GLOBAL */ +unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +unsigned long ticks_per_usec; /* TSC ticks per microsecond. */ +spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; +int timer_ack = 0; +int do_timer_lists_from_pit = 0; + +/* PRIVATE */ +static unsigned int rdtsc_bitshift; /* Which 32 bits of TSC do we use? */ +static u64 cpu_freq; /* CPU frequency (Hz) */ +static u32 st_scale_f; /* Cycles -> ns, fractional part */ +static u32 st_scale_i; /* Cycles -> ns, integer part */ +static u32 tsc_irq; /* CPU0's TSC at last 'time update' */ +static s_time_t stime_irq; /* System time at last 'time update' */ + +static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + u64 full_tsc; + + write_lock(&xtime_lock); + +#ifdef CONFIG_X86_IO_APIC + if ( timer_ack ) + { + extern spinlock_t i8259A_lock; + spin_lock(&i8259A_lock); + outb(0x0c, 0x20); + /* Ack the IRQ; AEOI will end it automatically. */ + inb(0x20); + spin_unlock(&i8259A_lock); + } +#endif + + /* + * Updates TSC timestamp (used to interpolate passage of time between + * interrupts). + */ + rdtscll(full_tsc); + tsc_irq = (u32)(full_tsc >> rdtsc_bitshift); + + /* Updates xtime (wallclock time). */ + do_timer(regs); + + /* Updates system time (nanoseconds since boot). */ + stime_irq += MILLISECS(1000/HZ); + + write_unlock(&xtime_lock); + + /* Rough hack to allow accurate timers to sort-of-work with no APIC. */ + if ( do_timer_lists_from_pit ) + __cpu_raise_softirq(smp_processor_id(), AC_TIMER_SOFTIRQ); +} + +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, + "timer", NULL, NULL}; + +/* ------ Calibrate the TSC ------- + * Return processor ticks per second / CALIBRATE_FRAC. + */ + +#define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */ +#define CALIBRATE_FRAC 20 /* calibrate over 50ms */ +#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC) + +static unsigned long __init calibrate_tsc(void) +{ + unsigned long startlow, starthigh, endlow, endhigh, count; + + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Now let's take care of CTC channel 2 + * + * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on + * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB) + * to begin countdown. + */ + outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ + outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */ + outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */ + + rdtsc(startlow, starthigh); + for ( count = 0; (inb(0x61) & 0x20) == 0; count++ ) + continue; + rdtsc(endlow, endhigh); + + /* Error if the CTC doesn't behave itself. */ + if ( count == 0 ) + return 0; + + /* [endhigh:endlow] = [endhigh:endlow] - [starthigh:startlow] */ + __asm__( "subl %2,%0 ; sbbl %3,%1" + : "=a" (endlow), "=d" (endhigh) + : "g" (startlow), "g" (starthigh), "0" (endlow), "1" (endhigh) ); + + /* If quotient doesn't fit in 32 bits then we return error (zero). */ + return endhigh ? 0 : endlow; +} + + +/*************************************************************************** + * CMOS Timer functions + ***************************************************************************/ + +/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. + * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 + * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. + * + * [For the Julian calendar (which was used in Russia before 1917, + * Britain & colonies before 1752, anywhere else before 1582, + * and is still in use by some communities) leave out the + * -year/100+year/400 terms, and add 10.] + * + * This algorithm was first published by Gauss (I think). + * + * WARNING: this function will overflow on 2106-02-07 06:28:16 on + * machines were long is 32-bit! (However, as time_t is signed, we + * will already get problems at other places on 2038-01-19 03:14:08) + */ +static inline unsigned long +mktime (unsigned int year, unsigned int mon, + unsigned int day, unsigned int hour, + unsigned int min, unsigned int sec) +{ + /* 1..12 -> 11,12,1..10: put Feb last since it has a leap day. */ + if ( 0 >= (int) (mon -= 2) ) + { + mon += 12; + year -= 1; + } + + return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+ + year*365 - 719499 + )*24 + hour /* now have hours */ + )*60 + min /* now have minutes */ + )*60 + sec; /* finally seconds */ +} + +static unsigned long __get_cmos_time(void) +{ + unsigned int year, mon, day, hour, min, sec; + + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); + + if ( !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) + { + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + } + + if ( (year += 1900) < 1970 ) + year += 100; + + return mktime(year, mon, day, hour, min, sec); +} + +static unsigned long get_cmos_time(void) +{ + unsigned long res, flags; + int i; + + spin_lock_irqsave(&rtc_lock, flags); + + /* read RTC exactly on falling edge of update flag */ + for ( i = 0 ; i < 1000000 ; i++ ) /* may take up to 1 second... */ + if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) ) + break; + for ( i = 0 ; i < 1000000 ; i++ ) /* must try at least 2.228 ms */ + if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) ) + break; + + res = __get_cmos_time(); + + spin_unlock_irqrestore(&rtc_lock, flags); + return res; +} + +/*************************************************************************** + * System Time + ***************************************************************************/ + +static inline u64 get_time_delta(void) +{ + s32 delta_tsc; + u32 low; + u64 delta, tsc; + + rdtscll(tsc); + low = (u32)(tsc >> rdtsc_bitshift); + delta_tsc = (s32)(low - tsc_irq); + if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; + delta = ((u64)delta_tsc * st_scale_f); + delta >>= 32; + delta += ((u64)delta_tsc * st_scale_i); + + return delta; +} + +s_time_t get_s_time(void) +{ + s_time_t now; + unsigned long flags; + + read_lock_irqsave(&xtime_lock, flags); + + now = stime_irq + get_time_delta(); + + /* Ensure that the returned system time is monotonically increasing. */ + { + static s_time_t prev_now = 0; + if ( unlikely(now < prev_now) ) + now = prev_now; + prev_now = now; + } + + read_unlock_irqrestore(&xtime_lock, flags); + + return now; +} + + +void update_dom_time(shared_info_t *si) +{ + unsigned long flags; + + read_lock_irqsave(&xtime_lock, flags); + + si->time_version1++; + wmb(); + + si->cpu_freq = cpu_freq; + si->tsc_timestamp.tsc_bitshift = rdtsc_bitshift; + si->tsc_timestamp.tsc_bits = tsc_irq; + si->system_time = stime_irq; + si->wc_sec = xtime.tv_sec; + si->wc_usec = xtime.tv_usec; + si->wc_usec += (jiffies - wall_jiffies) * (1000000 / HZ); + while ( si->wc_usec >= 1000000 ) + { + si->wc_usec -= 1000000; + si->wc_sec++; + } + + wmb(); + si->time_version2++; + + read_unlock_irqrestore(&xtime_lock, flags); +} + + +/* Set clock to after 00:00:00 UTC, 1 January, 1970. */ +void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base) +{ + s64 delta; + long _usecs = (long)usecs; + + write_lock_irq(&xtime_lock); + + delta = (s64)(stime_irq - system_time_base); + + _usecs += (long)(delta/1000); + _usecs -= (jiffies - wall_jiffies) * (1000000 / HZ); + + while ( _usecs < 0 ) + { + _usecs += 1000000; + secs--; + } + + xtime.tv_sec = secs; + xtime.tv_usec = _usecs; + + write_unlock_irq(&xtime_lock); + + update_dom_time(current->shared_info); +} + + +/* Late init function (after all CPUs are booted). */ +int __init init_xen_time() +{ + u64 scale; + u64 full_tsc; + unsigned int cpu_ghz; + + cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL); + for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 ) + continue; + + scale = 1000000000LL << (32 + rdtsc_bitshift); + scale /= cpu_freq; + st_scale_f = scale & 0xffffffff; + st_scale_i = scale >> 32; + + /* System time ticks from zero. */ + rdtscll(full_tsc); + stime_irq = (s_time_t)0; + tsc_irq = (u32)(full_tsc >> rdtsc_bitshift); + + /* Wallclock time starts as the initial RTC time. */ + xtime.tv_sec = get_cmos_time(); + + printk("Time init:\n"); + printk(".... System Time: %lldns\n", + NOW()); + printk(".... cpu_freq: %08X:%08X\n", + (u32)(cpu_freq>>32), (u32)cpu_freq); + printk(".... scale: %08X:%08X\n", + (u32)(scale>>32), (u32)scale); + printk(".... Wall Clock: %lds %ldus\n", + xtime.tv_sec, xtime.tv_usec); + + return 0; +} + + +/* Early init function. */ +void __init time_init(void) +{ + unsigned long ticks_per_frac = calibrate_tsc(); + + if ( !ticks_per_frac ) + panic("Error calibrating TSC\n"); + + ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC); + cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC); + + cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC; + + printk("Detected %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + + setup_irq(0, &irq0); +} diff --git a/xen/arch/x86/trampoline.S b/xen/arch/x86/trampoline.S new file mode 100644 index 0000000000..d9a1cb6888 --- /dev/null +++ b/xen/arch/x86/trampoline.S @@ -0,0 +1,59 @@ +/* + * + * Trampoline.S Derived from Setup.S by Linus Torvalds + * + * 4 Jan 1997 Michael Chastain: changed to gnu as. + * + * Entry: CS:IP point to the start of our code, we are + * in real mode with no stack, but the rest of the + * trampoline page to make our stack and everything else + * is a mystery. + * + * On entry to trampoline_data, the processor is in real mode + * with 16-bit addressing and 16-bit data. CS has some value + * and IP is zero. Thus, data addresses need to be absolute + * (no relocation) and are taken with regard to r_base. + */ + +#include +#include +#include + +#ifdef CONFIG_SMP + +.data + +.code16 + +ENTRY(trampoline_data) +r_base = . + mov %cs, %ax # Code and data in the same place + mov %ax, %ds + + movl $0xA5A5A5A5, %ebx # Flag an SMP trampoline + cli # We should be safe anyway + + movl $0xA5A5A5A5, trampoline_data - r_base + + lidt idt_48 - r_base # load idt with 0, 0 + lgdt gdt_48 - r_base # load gdt with whatever is appropriate + + xor %ax, %ax + inc %ax # protected mode (PE) bit + lmsw %ax # into protected mode + jmp flush_instr +flush_instr: + ljmpl $__HYPERVISOR_CS, $(MONITOR_BASE)-__PAGE_OFFSET + +idt_48: + .word 0 # idt limit = 0 + .word 0, 0 # idt base = 0L + +gdt_48: + .word (LAST_RESERVED_GDT_ENTRY*8)+7 + .long gdt_table-__PAGE_OFFSET + +.globl SYMBOL_NAME(trampoline_end) +SYMBOL_NAME_LABEL(trampoline_end) + +#endif /* CONFIG_SMP */ diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c new file mode 100644 index 0000000000..329efc9d23 --- /dev/null +++ b/xen/arch/x86/traps.c @@ -0,0 +1,910 @@ +/****************************************************************************** + * arch/i386/traps.c + * + * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * xen/arch/i386/traps.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GTBF_TRAP 1 +#define GTBF_TRAP_NOCODE 2 +#define GTBF_TRAP_CR2 4 +struct guest_trap_bounce { + unsigned long error_code; /* 0 */ + unsigned long cr2; /* 4 */ + unsigned short flags; /* 8 */ + unsigned short cs; /* 10 */ + unsigned long eip; /* 12 */ +} guest_trap_bounce[NR_CPUS] = { { 0 } }; + +#define DOUBLEFAULT_STACK_SIZE 1024 +static struct tss_struct doublefault_tss; +static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE]; + +asmlinkage int hypervisor_call(void); +asmlinkage void lcall7(void); +asmlinkage void lcall27(void); + +/* Master table, and the one used by CPU0. */ +struct desc_struct idt_table[256] = { {0, 0}, }; +/* All other CPUs have their own copy. */ +struct desc_struct *idt_tables[NR_CPUS] = { 0 }; + +asmlinkage void divide_error(void); +asmlinkage void debug(void); +asmlinkage void nmi(void); +asmlinkage void int3(void); +asmlinkage void overflow(void); +asmlinkage void bounds(void); +asmlinkage void invalid_op(void); +asmlinkage void device_not_available(void); +asmlinkage void coprocessor_segment_overrun(void); +asmlinkage void invalid_TSS(void); +asmlinkage void segment_not_present(void); +asmlinkage void stack_segment(void); +asmlinkage void general_protection(void); +asmlinkage void page_fault(void); +asmlinkage void coprocessor_error(void); +asmlinkage void simd_coprocessor_error(void); +asmlinkage void alignment_check(void); +asmlinkage void spurious_interrupt_bug(void); +asmlinkage void machine_check(void); + +int kstack_depth_to_print = 8*20; + +static inline int kernel_text_address(unsigned long addr) +{ + if (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext) + return 1; + return 0; + +} + +void show_stack(unsigned long *esp) +{ + unsigned long *stack, addr; + int i; + + printk("Stack trace from ESP=%p:\n", esp); + + stack = esp; + for ( i = 0; i < kstack_depth_to_print; i++ ) + { + if ( ((long)stack & (STACK_SIZE-1)) == 0 ) + break; + if ( i && ((i % 8) == 0) ) + printk("\n "); + if ( kernel_text_address(*stack) ) + printk("[%08lx] ", *stack++); + else + printk("%08lx ", *stack++); + } + printk("\n"); + + printk("Call Trace from ESP=%p: ", esp); + stack = esp; + i = 0; + while (((long) stack & (STACK_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { + if (i && ((i % 6) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } + printk("\n"); +} + +void show_registers(struct pt_regs *regs) +{ + unsigned long esp; + unsigned short ss; + + esp = (unsigned long) (®s->esp); + ss = __HYPERVISOR_DS; + if ( regs->xcs & 3 ) + { + esp = regs->esp; + ss = regs->xss & 0xffff; + } + + printk("CPU: %d\nEIP: %04x:[<%08lx>] \nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags); + printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, + regs->xfs & 0xffff, regs->xgs & 0xffff, ss); + + show_stack(®s->esp); +} + + +spinlock_t die_lock = SPIN_LOCK_UNLOCKED; + +void die(const char * str, struct pt_regs * regs, long err) +{ + unsigned long flags; + spin_lock_irqsave(&die_lock, flags); + printk("%s: %04lx,%04lx\n", str, err >> 16, err & 0xffff); + show_registers(regs); + spin_unlock_irqrestore(&die_lock, flags); + panic("HYPERVISOR DEATH!!\n"); +} + + +static inline void do_trap(int trapnr, char *str, + struct pt_regs *regs, + long error_code, int use_error_code) +{ + struct task_struct *p = current; + struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); + trap_info_t *ti; + unsigned long fixup; + + if (!(regs->xcs & 3)) + goto fault_in_hypervisor; + + ti = current->thread.traps + trapnr; + gtb->flags = use_error_code ? GTBF_TRAP : GTBF_TRAP_NOCODE; + gtb->error_code = error_code; + gtb->cs = ti->cs; + gtb->eip = ti->address; + if ( TI_GET_IF(ti) ) + p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; + return; + + fault_in_hypervisor: + + if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) + { + DPRINTK("Trap %d: %08lx -> %08lx\n", trapnr, regs->eip, fixup); + regs->eip = fixup; + regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS; + return; + } + + show_registers(regs); + panic("CPU%d FATAL TRAP: vector = %d (%s)\n" + "[error_code=%08x]\n", + smp_processor_id(), trapnr, str, error_code); +} + +#define DO_ERROR_NOCODE(trapnr, str, name) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ +do_trap(trapnr, str, regs, error_code, 0); \ +} + +#define DO_ERROR(trapnr, str, name) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ +do_trap(trapnr, str, regs, error_code, 1); \ +} + +DO_ERROR_NOCODE( 0, "divide error", divide_error) + DO_ERROR_NOCODE( 4, "overflow", overflow) + DO_ERROR_NOCODE( 5, "bounds", bounds) + DO_ERROR_NOCODE( 6, "invalid operand", invalid_op) + DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun) + DO_ERROR(10, "invalid TSS", invalid_TSS) + DO_ERROR(11, "segment not present", segment_not_present) + DO_ERROR(12, "stack segment", stack_segment) +/* Vector 15 reserved by Intel */ + DO_ERROR_NOCODE(16, "fpu error", coprocessor_error) + DO_ERROR(17, "alignment check", alignment_check) + DO_ERROR_NOCODE(18, "machine check", machine_check) + DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error) + + asmlinkage void do_int3(struct pt_regs *regs, long error_code) +{ + struct task_struct *p = current; + struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); + trap_info_t *ti; + +#ifdef XEN_DEBUGGER + if ( pdb_initialized && pdb_handle_exception(3, regs) == 0 ) + return; +#endif + + if ( (regs->xcs & 3) != 3 ) + { + if ( unlikely((regs->xcs & 3) == 0) ) + { + show_registers(regs); + panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n" + "[error_code=%08x]\n", + smp_processor_id(), error_code); + } + } + + ti = current->thread.traps + 3; + gtb->flags = GTBF_TRAP_NOCODE; + gtb->error_code = error_code; + gtb->cs = ti->cs; + gtb->eip = ti->address; + if ( TI_GET_IF(ti) ) + p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; +} + +asmlinkage void do_double_fault(void) +{ + extern spinlock_t console_lock; + struct tss_struct *tss = &doublefault_tss; + unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1; + + /* Disable the NMI watchdog. It's useless now. */ + watchdog_on = 0; + + /* Find information saved during fault and dump it to the console. */ + tss = &init_tss[cpu]; + printk("CPU: %d\nEIP: %04x:[<%08lx>] \nEFLAGS: %08lx\n", + cpu, tss->cs, tss->eip, tss->eflags); + printk("CR3: %08lx\n", tss->__cr3); + printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + tss->eax, tss->ebx, tss->ecx, tss->edx); + printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + tss->esi, tss->edi, tss->ebp, tss->esp); + printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", + tss->ds, tss->es, tss->fs, tss->gs, tss->ss); + printk("************************************\n"); + printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu); + printk("System needs manual reset.\n"); + printk("************************************\n"); + + /* Lock up the console to prevent spurious output from other CPUs. */ + spin_lock(&console_lock); + + /* Wait for manual reset. */ + for ( ; ; ) ; +} + +asmlinkage void do_page_fault(struct pt_regs *regs, long error_code) +{ + struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); + trap_info_t *ti; + unsigned long off, addr, fixup; + struct task_struct *p = current; + extern int map_ldt_shadow_page(unsigned int); + + __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : ); + + perfc_incrc(page_faults); + + if ( unlikely(addr >= LDT_VIRT_START) && + (addr < (LDT_VIRT_START + (p->mm.ldt_ents*LDT_ENTRY_SIZE))) ) + { + /* + * Copy a mapping from the guest's LDT, if it is valid. Otherwise we + * send the fault up to the guest OS to be handled. + */ + off = addr - LDT_VIRT_START; + addr = p->mm.ldt_base + off; + if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT)) ) + return; /* successfully copied the mapping */ + } + + if ( unlikely(p->mm.shadow_mode) && + (addr < PAGE_OFFSET) && shadow_fault(addr, error_code) ) + return; /* Returns TRUE if fault was handled. */ + + if ( unlikely(!(regs->xcs & 3)) ) + goto fault_in_hypervisor; + + ti = p->thread.traps + 14; + gtb->flags = GTBF_TRAP_CR2; /* page fault pushes %cr2 */ + gtb->cr2 = addr; + gtb->error_code = error_code; + gtb->cs = ti->cs; + gtb->eip = ti->address; + if ( TI_GET_IF(ti) ) + p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; + return; + + fault_in_hypervisor: + + if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) + { + perfc_incrc(copy_user_faults); + if ( !p->mm.shadow_mode ) + DPRINTK("Page fault: %08lx -> %08lx\n", regs->eip, fixup); + regs->eip = fixup; + regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS; + return; + } + + if ( addr >= PAGE_OFFSET ) + { + unsigned long page; + page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]); + printk("*pde = %08lx\n", page); + if ( page & _PAGE_PRESENT ) + { + page &= PAGE_MASK; + page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT]; + printk(" *pte = %08lx\n", page); + } +#ifdef MEMORY_GUARD + if ( !(error_code & 1) ) + printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n"); +#endif + } + +#ifdef XEN_DEBUGGER + if ( pdb_page_fault_possible ) + { + pdb_page_fault = 1; + /* make eax & edx valid to complete the instruction */ + regs->eax = (long)&pdb_page_fault_scratch; + regs->edx = (long)&pdb_page_fault_scratch; + return; + } +#endif + + show_registers(regs); + panic("CPU%d FATAL PAGE FAULT\n" + "[error_code=%08x]\n" + "Faulting linear address might be %08lx\n", + smp_processor_id(), error_code, addr); +} + +asmlinkage void do_general_protection(struct pt_regs *regs, long error_code) +{ + struct task_struct *p = current; + struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); + trap_info_t *ti; + unsigned long fixup; + + /* Badness if error in ring 0, or result of an interrupt. */ + if ( !(regs->xcs & 3) || (error_code & 1) ) + goto gp_in_kernel; + + /* + * Cunning trick to allow arbitrary "INT n" handling. + * + * We set DPL == 0 on all vectors in the IDT. This prevents any INT + * instruction from trapping to the appropriate vector, when that might not + * be expected by Xen or the guest OS. For example, that entry might be for + * a fault handler (unlike traps, faults don't increment EIP), or might + * expect an error code on the stack (which a software trap never + * provides), or might be a hardware interrupt handler that doesn't like + * being called spuriously. + * + * Instead, a GPF occurs with the faulting IDT vector in the error code. + * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is + * clear to indicate that it's a software fault, not hardware. + * + * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is + * okay because they can only be triggered by an explicit DPL-checked + * instruction. The DPL specified by the guest OS for these vectors is NOT + * CHECKED!! + */ + if ( (error_code & 3) == 2 ) + { + /* This fault must be due to instruction. */ + ti = current->thread.traps + (error_code>>3); + if ( TI_GET_DPL(ti) >= (regs->xcs & 3) ) + { +#ifdef XEN_DEBUGGER + if ( pdb_initialized && (pdb_ctx.system_call != 0) ) + { + unsigned long cr3; + __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); + if ( cr3 == pdb_ctx.ptbr ) + pdb_linux_syscall_enter_bkpt(regs, error_code, ti); + } +#endif + + gtb->flags = GTBF_TRAP_NOCODE; + regs->eip += 2; + goto finish_propagation; + } + } + + /* Pass on GPF as is. */ + ti = current->thread.traps + 13; + gtb->flags = GTBF_TRAP; + gtb->error_code = error_code; + finish_propagation: + gtb->cs = ti->cs; + gtb->eip = ti->address; + if ( TI_GET_IF(ti) ) + p->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; + return; + + gp_in_kernel: + + if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) + { + DPRINTK("GPF (%04lx): %08lx -> %08lx\n", error_code, regs->eip, fixup); + regs->eip = fixup; + regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS; + return; + } + + die("general protection fault", regs, error_code); +} + +asmlinkage void mem_parity_error(unsigned char reason, struct pt_regs * regs) +{ + printk("NMI received. Dazed and confused, but trying to continue\n"); + printk("You probably have a hardware problem with your RAM chips\n"); + + /* Clear and disable the memory parity error line. */ + reason = (reason & 0xf) | 4; + outb(reason, 0x61); + + show_registers(regs); + panic("PARITY ERROR"); +} + +asmlinkage void io_check_error(unsigned char reason, struct pt_regs * regs) +{ + printk("NMI: IOCK error (debug interrupt?)\n"); + + reason = (reason & 0xf) | 8; + outb(reason, 0x61); + + show_registers(regs); + panic("IOCK ERROR"); +} + +static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +{ + printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); + printk("Dazed and confused, but trying to continue\n"); + printk("Do you have a strange power saving mode enabled?\n"); +} + +asmlinkage void do_nmi(struct pt_regs * regs, unsigned long reason) +{ + ++nmi_count(smp_processor_id()); + +#if CONFIG_X86_LOCAL_APIC + if ( nmi_watchdog ) + nmi_watchdog_tick(regs); + else +#endif + unknown_nmi_error((unsigned char)(reason&0xff), regs); +} + +asmlinkage void math_state_restore(struct pt_regs *regs, long error_code) +{ + /* Prevent recursion. */ + clts(); + + if ( !test_bit(PF_USEDFPU, ¤t->flags) ) + { + if ( test_bit(PF_DONEFPUINIT, ¤t->flags) ) + restore_fpu(current); + else + init_fpu(); + set_bit(PF_USEDFPU, ¤t->flags); /* so we fnsave on switch_to() */ + } + + if ( test_and_clear_bit(PF_GUEST_STTS, ¤t->flags) ) + { + struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); + gtb->flags = GTBF_TRAP_NOCODE; + gtb->cs = current->thread.traps[7].cs; + gtb->eip = current->thread.traps[7].address; + } +} + +#ifdef XEN_DEBUGGER +asmlinkage void do_pdb_debug(struct pt_regs *regs, long error_code) +{ + unsigned int condition; + struct task_struct *tsk = current; + struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); + + __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + if ( (condition & (1 << 14)) != (1 << 14) ) + printk("\nwarning: debug trap w/o BS bit [0x%x]\n\n", condition); + __asm__("movl %0,%%db6" : : "r" (0)); + + if ( pdb_handle_exception(1, regs) != 0 ) + { + tsk->thread.debugreg[6] = condition; + + gtb->flags = GTBF_TRAP_NOCODE; + gtb->cs = tsk->thread.traps[1].cs; + gtb->eip = tsk->thread.traps[1].address; + } +} +#endif + +asmlinkage void do_debug(struct pt_regs *regs, long error_code) +{ + unsigned int condition; + struct task_struct *tsk = current; + struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); + +#ifdef XEN_DEBUGGER + if ( pdb_initialized ) + return do_pdb_debug(regs, error_code); +#endif + + __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + + /* Mask out spurious debug traps due to lazy DR7 setting */ + if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) && + (tsk->thread.debugreg[7] == 0) ) + { + __asm__("movl %0,%%db7" : : "r" (0)); + return; + } + + if ( (regs->xcs & 3) == 0 ) + { + /* Clear TF just for absolute sanity. */ + regs->eflags &= ~EF_TF; + /* + * Basically, we ignore watchpoints when they trigger in + * the hypervisor. This may happen when a buffer is passed + * to us which previously had a watchpoint set on it. + * No need to bump EIP; the only faulting trap is an + * instruction breakpoint, which can't happen to us. + */ + return; + } + + /* Save debug status register where guest OS can peek at it */ + tsk->thread.debugreg[6] = condition; + + gtb->flags = GTBF_TRAP_NOCODE; + gtb->cs = tsk->thread.traps[1].cs; + gtb->eip = tsk->thread.traps[1].address; +} + + +asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, + long error_code) +{ /* nothing */ } + + +#define _set_gate(gate_addr,type,dpl,addr) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ + "movw %4,%%dx\n\t" \ + "movl %%eax,%0\n\t" \ + "movl %%edx,%1" \ + :"=m" (*((long *) (gate_addr))), \ + "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ + :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ + "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \ +} while (0) + +void set_intr_gate(unsigned int n, void *addr) +{ + _set_gate(idt_table+n,14,0,addr); +} + +static void __init set_system_gate(unsigned int n, void *addr) +{ + _set_gate(idt_table+n,14,3,addr); +} + +static void set_task_gate(unsigned int n, unsigned int sel) +{ + idt_table[n].a = sel << 16; + idt_table[n].b = 0x8500; +} + +#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\ + *((gate_addr)+1) = ((base) & 0xff000000) | \ + (((base) & 0x00ff0000)>>16) | \ + ((limit) & 0xf0000) | \ + ((dpl)<<13) | \ + (0x00408000) | \ + ((type)<<8); \ + *(gate_addr) = (((base) & 0x0000ffff)<<16) | \ + ((limit) & 0x0ffff); } + +#define _set_tssldt_desc(n,addr,limit,type) \ +__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ + "movw %%ax,2(%2)\n\t" \ + "rorl $16,%%eax\n\t" \ + "movb %%al,4(%2)\n\t" \ + "movb %4,5(%2)\n\t" \ + "movb $0,6(%2)\n\t" \ + "movb %%ah,7(%2)\n\t" \ + "rorl $16,%%eax" \ + : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type)) + +void set_tss_desc(unsigned int n, void *addr) +{ + _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 8299, 0x89); +} + +void __init trap_init(void) +{ + /* + * Make a separate task for double faults. This will get us debug output if + * we blow the kernel stack. + */ + struct tss_struct *tss = &doublefault_tss; + memset(tss, 0, sizeof(*tss)); + tss->ds = __HYPERVISOR_DS; + tss->es = __HYPERVISOR_DS; + tss->ss = __HYPERVISOR_DS; + tss->esp = (unsigned long) + &doublefault_stack[DOUBLEFAULT_STACK_SIZE]; + tss->__cr3 = __pa(idle_pg_table); + tss->cs = __HYPERVISOR_CS; + tss->eip = (unsigned long)do_double_fault; + tss->eflags = 2; + tss->bitmap = INVALID_IO_BITMAP_OFFSET; + _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY, + (int)tss, 235, 0x89); + + /* + * Note that interrupt gates are always used, rather than trap gates. We + * must have interrupts disabled until DS/ES/FS/GS are saved because the + * first activation must have the "bad" value(s) for these registers and + * we may lose them if another activation is installed before they are + * saved. The page-fault handler also needs interrupts disabled until %cr2 + * has been read and saved on the stack. + */ + set_intr_gate(0,÷_error); + set_intr_gate(1,&debug); + set_intr_gate(2,&nmi); + set_system_gate(3,&int3); /* usable from all privilege levels */ + set_system_gate(4,&overflow); /* usable from all privilege levels */ + set_intr_gate(5,&bounds); + set_intr_gate(6,&invalid_op); + set_intr_gate(7,&device_not_available); + set_task_gate(8,__DOUBLEFAULT_TSS_ENTRY<<3); + set_intr_gate(9,&coprocessor_segment_overrun); + set_intr_gate(10,&invalid_TSS); + set_intr_gate(11,&segment_not_present); + set_intr_gate(12,&stack_segment); + set_intr_gate(13,&general_protection); + set_intr_gate(14,&page_fault); + set_intr_gate(15,&spurious_interrupt_bug); + set_intr_gate(16,&coprocessor_error); + set_intr_gate(17,&alignment_check); + set_intr_gate(18,&machine_check); + set_intr_gate(19,&simd_coprocessor_error); + + /* Only ring 1 can access monitor services. */ + _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,14,1,&hypervisor_call); + + /* CPU0 uses the master IDT. */ + idt_tables[0] = idt_table; + + /* + * Should be a barrier for any external CPU state. + */ + { + extern void cpu_init(void); + cpu_init(); + } +} + + +long do_set_trap_table(trap_info_t *traps) +{ + trap_info_t cur; + trap_info_t *dst = current->thread.traps; + + for ( ; ; ) + { + if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT; + + if ( cur.address == 0 ) break; + + if ( !VALID_CODESEL(cur.cs) ) return -EPERM; + + memcpy(dst+cur.vector, &cur, sizeof(cur)); + traps++; + } + + return 0; +} + + +long do_set_callbacks(unsigned long event_selector, + unsigned long event_address, + unsigned long failsafe_selector, + unsigned long failsafe_address) +{ + struct task_struct *p = current; + + if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) ) + return -EPERM; + + p->event_selector = event_selector; + p->event_address = event_address; + p->failsafe_selector = failsafe_selector; + p->failsafe_address = failsafe_address; + + return 0; +} + + +long set_fast_trap(struct task_struct *p, int idx) +{ + trap_info_t *ti; + + /* Index 0 is special: it disables fast traps. */ + if ( idx == 0 ) + { + if ( p == current ) + CLEAR_FAST_TRAP(&p->thread); + SET_DEFAULT_FAST_TRAP(&p->thread); + return 0; + } + + /* + * We only fast-trap vectors 0x20-0x2f, and vector 0x80. + * The former range is used by Windows and MS-DOS. + * Vector 0x80 is used by Linux and the BSD variants. + */ + if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) ) + return -1; + + ti = p->thread.traps + idx; + + /* + * We can't virtualise interrupt gates, as there's no way to get + * the CPU to automatically clear the events_mask variable. + */ + if ( TI_GET_IF(ti) ) + return -1; + + if ( p == current ) + CLEAR_FAST_TRAP(&p->thread); + + p->thread.fast_trap_idx = idx; + p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff); + p->thread.fast_trap_desc.b = + (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13; + + if ( p == current ) + SET_FAST_TRAP(&p->thread); + + return 0; +} + + +long do_set_fast_trap(int idx) +{ + return set_fast_trap(current, idx); +} + + +long do_fpu_taskswitch(void) +{ + set_bit(PF_GUEST_STTS, ¤t->flags); + stts(); + return 0; +} + + +long set_debugreg(struct task_struct *p, int reg, unsigned long value) +{ + int i; + + switch ( reg ) + { + case 0: + if ( value > (PAGE_OFFSET-4) ) return -EPERM; + if ( p == current ) + __asm__ ( "movl %0, %%db0" : : "r" (value) ); + break; + case 1: + if ( value > (PAGE_OFFSET-4) ) return -EPERM; + if ( p == current ) + __asm__ ( "movl %0, %%db1" : : "r" (value) ); + break; + case 2: + if ( value > (PAGE_OFFSET-4) ) return -EPERM; + if ( p == current ) + __asm__ ( "movl %0, %%db2" : : "r" (value) ); + break; + case 3: + if ( value > (PAGE_OFFSET-4) ) return -EPERM; + if ( p == current ) + __asm__ ( "movl %0, %%db3" : : "r" (value) ); + break; + case 6: + /* + * DR6: Bits 4-11,16-31 reserved (set to 1). + * Bit 12 reserved (set to 0). + */ + value &= 0xffffefff; /* reserved bits => 0 */ + value |= 0xffff0ff0; /* reserved bits => 1 */ + if ( p == current ) + __asm__ ( "movl %0, %%db6" : : "r" (value) ); + break; + case 7: + /* + * DR7: Bit 10 reserved (set to 1). + * Bits 11-12,14-15 reserved (set to 0). + * Privileged bits: + * GD (bit 13): must be 0. + * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10. + * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10. + */ + /* DR7 == 0 => debugging disabled for this domain. */ + if ( value != 0 ) + { + value &= 0xffff27ff; /* reserved bits => 0 */ + value |= 0x00000400; /* reserved bits => 1 */ + if ( (value & (1<<13)) != 0 ) return -EPERM; + for ( i = 0; i < 16; i += 2 ) + if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM; + } + if ( p == current ) + __asm__ ( "movl %0, %%db7" : : "r" (value) ); + break; + default: + return -EINVAL; + } + + p->thread.debugreg[reg] = value; + return 0; +} + +long do_set_debugreg(int reg, unsigned long value) +{ + return set_debugreg(current, reg, value); +} + +unsigned long do_get_debugreg(int reg) +{ + if ( (reg < 0) || (reg > 7) ) return -EINVAL; + return current->thread.debugreg[reg]; +} diff --git a/xen/arch/x86/usercopy.c b/xen/arch/x86/usercopy.c new file mode 100644 index 0000000000..dc2d34cb90 --- /dev/null +++ b/xen/arch/x86/usercopy.c @@ -0,0 +1,190 @@ +/* + * User address space access functions. + * The non inlined parts of asm-i386/uaccess.h are here. + * + * Copyright 1997 Andi Kleen + * Copyright 1997 Linus Torvalds + */ +#include +#include +//#include + +#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS + +unsigned long +__generic_copy_to_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + { + if(n<512) + __copy_user(to,from,n); + else + mmx_copy_user(to,from,n); + } + return n; +} + +unsigned long +__generic_copy_from_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + { + if(n<512) + __copy_user_zeroing(to,from,n); + else + mmx_copy_user_zeroing(to, from, n); + } + else + memset(to, 0, n); + return n; +} + +#else + +unsigned long +__generic_copy_to_user(void *to, const void *from, unsigned long n) +{ + prefetch(from); + if (access_ok(VERIFY_WRITE, to, n)) + __copy_user(to,from,n); + return n; +} + +unsigned long +__generic_copy_from_user(void *to, const void *from, unsigned long n) +{ + prefetchw(to); + if (access_ok(VERIFY_READ, from, n)) + __copy_user_zeroing(to,from,n); + else + memset(to, 0, n); + return n; +} + +#endif + +/* + * Copy a null terminated string from userspace. + */ + +#define __do_strncpy_from_user(dst,src,count,res) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " testl %1,%1\n" \ + " jz 2f\n" \ + "0: lodsb\n" \ + " stosb\n" \ + " testb %%al,%%al\n" \ + " jz 1f\n" \ + " decl %1\n" \ + " jnz 0b\n" \ + "1: subl %1,%0\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %5,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + ".previous" \ + : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + "=&D" (__d2) \ + : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ + : "memory"); \ +} while (0) + +long +__strncpy_from_user(char *dst, const char *src, long count) +{ + long res; + __do_strncpy_from_user(dst, src, count, res); + return res; +} + +long +strncpy_from_user(char *dst, const char *src, long count) +{ + long res = -EFAULT; + if (access_ok(VERIFY_READ, src, 1)) + __do_strncpy_from_user(dst, src, count, res); + return res; +} + + +/* + * Zero Userspace + */ + +#define __do_clear_user(addr,size) \ +do { \ + int __d0; \ + __asm__ __volatile__( \ + "0: rep; stosl\n" \ + " movl %2,%0\n" \ + "1: rep; stosb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%2,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0) \ + : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ +} while (0) + +unsigned long +clear_user(void *to, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + __do_clear_user(to, n); + return n; +} + +unsigned long +__clear_user(void *to, unsigned long n) +{ + __do_clear_user(to, n); + return n; +} + +/* + * Return the size of a string (including the ending 0) + * + * Return 0 on exception, a value greater than N if too long + */ + +long strnlen_user(const char *s, long n) +{ + unsigned long mask = -__addr_ok(s); + unsigned long res, tmp; + + __asm__ __volatile__( + " testl %0, %0\n" + " jz 3f\n" + " andl %0,%%ecx\n" + "0: repne; scasb\n" + " setne %%al\n" + " subl %%ecx,%0\n" + " addl %0,%%eax\n" + "1:\n" + ".section .fixup,\"ax\"\n" + "2: xorl %%eax,%%eax\n" + " jmp 1b\n" + "3: movb $1,%%al\n" + " jmp 1b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,2b\n" + ".previous" + :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) + :"0" (n), "1" (s), "2" (0), "3" (mask) + :"cc"); + return res & mask; +} diff --git a/xen/arch/x86/xen.lds b/xen/arch/x86/xen.lds new file mode 100644 index 0000000000..5947ebada5 --- /dev/null +++ b/xen/arch/x86/xen.lds @@ -0,0 +1,87 @@ +/* ld script to make i386 Linux kernel + * Written by Martin Mares ; + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(start) +SECTIONS +{ + . = 0xFC400000 + 0x100000; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.fixup) + *(.gnu.warning) + } = 0x9090 + .text.lock : { *(.text.lock) } /* out-of-line lock text */ + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + .kstrtab : { *(.kstrtab) } + + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + + __start___kallsyms = .; /* All kernel symbols */ + __kallsyms : { *(__kallsyms) } + __stop___kallsyms = .; + + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + _edata = .; /* End of data section */ + + . = ALIGN(8192); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + .text.init : { *(.text.init) } + .data.init : { *(.data.init) } + . = ALIGN(16); + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + . = ALIGN(4096); + __init_end = .; + + . = ALIGN(4096); + .data.page_aligned : { *(.data.idt) } + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff --git a/xen/arch/x86_64/Rules.mk b/xen/arch/x86_64/Rules.mk deleted file mode 100644 index ceea6a2479..0000000000 --- a/xen/arch/x86_64/Rules.mk +++ /dev/null @@ -1,18 +0,0 @@ -######################################## -# x86-specific definitions - -CC := gcc -LD := ld -# Linker should relocate monitor to this address -MONITOR_BASE := 0xFC500000 -# Bootloader should load monitor to this real address -LOAD_BASE := 0x00100000 -CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing -CFLAGS += -iwithprefix include -O3 -Wall -DMONITOR_BASE=$(MONITOR_BASE) -CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -DNDEBUG -#CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__ -CFLAGS += -Wno-pointer-arith -Wredundant-decls -m64 -LDARCHFLAGS := -LDFLAGS := -T xen.lds -N - - diff --git a/xen/drivers/char/keyboard.c b/xen/drivers/char/keyboard.c index 960bc5f773..e736aa737f 100644 --- a/xen/drivers/char/keyboard.c +++ b/xen/drivers/char/keyboard.c @@ -6,8 +6,8 @@ * This file contains portions of code from Linux. */ -#include -#include +#include +#include #include #include #include diff --git a/xen/drivers/char/serial.c b/xen/drivers/char/serial.c index 04c41cc28e..8e6cd462b8 100644 --- a/xen/drivers/char/serial.c +++ b/xen/drivers/char/serial.c @@ -8,7 +8,7 @@ * Copyright (c) 2003-2004, K A Fraser */ -#include +#include #include #include #include diff --git a/xen/drivers/pci/pci.c b/xen/drivers/pci/pci.c index 846e5f01dc..4111f2d275 100644 --- a/xen/drivers/pci/pci.c +++ b/xen/drivers/pci/pci.c @@ -930,27 +930,10 @@ pci_clear_mwi(struct pci_dev *dev) } } -int -pci_set_dma_mask(struct pci_dev *dev, u64 mask) -{ - if (!pci_dma_supported(dev, mask)) - return -EIO; - - dev->dma_mask = mask; - - return 0; -} - -int -pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) -{ - if (!pci_dac_dma_supported(dev, mask)) - return -EIO; - - dev->dma_mask = mask; - - return 0; -} +#if 0 /* NOT IN XEN */ +int pci_set_dma_mask(struct pci_dev *dev, u64 mask) +int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) +#endif /* * Translate the low bits of the PCI base diff --git a/xen/include/asm-i386/acpi.h b/xen/include/asm-i386/acpi.h deleted file mode 100644 index 4d750d486f..0000000000 --- a/xen/include/asm-i386/acpi.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * asm-i386/acpi.h - * - * Copyright (C) 2001 Paul Diefenbaugh - * Copyright (C) 2001 Patrick Mochel - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#ifndef _ASM_ACPI_H -#define _ASM_ACPI_H - -#ifdef __KERNEL__ - -#define COMPILER_DEPENDENT_INT64 long long -#define COMPILER_DEPENDENT_UINT64 unsigned long long - -/* - * Calling conventions: - * - * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) - * ACPI_EXTERNAL_XFACE - External ACPI interfaces - * ACPI_INTERNAL_XFACE - Internal ACPI interfaces - * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces - */ -#define ACPI_SYSTEM_XFACE -#define ACPI_EXTERNAL_XFACE -#define ACPI_INTERNAL_XFACE -#define ACPI_INTERNAL_VAR_XFACE - -/* Asm macros */ - -#define ACPI_ASM_MACROS -#define BREAKPOINT3 -#define ACPI_DISABLE_IRQS() __cli() -#define ACPI_ENABLE_IRQS() __sti() -#define ACPI_FLUSH_CPU_CACHE() wbinvd() - -/* - * A brief explanation as GNU inline assembly is a bit hairy - * %0 is the output parameter in EAX ("=a") - * %1 and %2 are the input parameters in ECX ("c") - * and an immediate value ("i") respectively - * All actual register references are preceded with "%%" as in "%%edx" - * Immediate values in the assembly are preceded by "$" as in "$0x1" - * The final asm parameter are the operation altered non-output registers. - */ -#define ACPI_ACQUIRE_GLOBAL_LOCK(GLptr, Acq) \ - do { \ - int dummy; \ - asm("1: movl (%1),%%eax;" \ - "movl %%eax,%%edx;" \ - "andl %2,%%edx;" \ - "btsl $0x1,%%edx;" \ - "adcl $0x0,%%edx;" \ - "lock; cmpxchgl %%edx,(%1);" \ - "jnz 1b;" \ - "cmpb $0x3,%%dl;" \ - "sbbl %%eax,%%eax" \ - :"=a"(Acq),"=c"(dummy):"c"(GLptr),"i"(~1L):"dx"); \ - } while(0) - -#define ACPI_RELEASE_GLOBAL_LOCK(GLptr, Acq) \ - do { \ - int dummy; \ - asm("1: movl (%1),%%eax;" \ - "movl %%eax,%%edx;" \ - "andl %2,%%edx;" \ - "lock; cmpxchgl %%edx,(%1);" \ - "jnz 1b;" \ - "andl $0x1,%%eax" \ - :"=a"(Acq),"=c"(dummy):"c"(GLptr),"i"(~3L):"dx"); \ - } while(0) - - -/* - * Math helper asm macros - */ -#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ - asm("divl %2;" \ - :"=a"(q32), "=d"(r32) \ - :"r"(d32), \ - "0"(n_lo), "1"(n_hi)) - - -#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ - asm("shrl $1,%2;" \ - "rcrl $1,%3;" \ - :"=r"(n_hi), "=r"(n_lo) \ - :"0"(n_hi), "1"(n_lo)) - - -#ifdef CONFIG_ACPI_BOOT -extern int acpi_lapic; -extern int acpi_ioapic; -extern int acpi_noirq; - -/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */ -#define FIX_ACPI_PAGES 4 - -#else /* !CONFIG_ACPI_BOOT */ -# define acpi_lapic 0 -# define acpi_ioapic 0 - -#endif /* !CONFIG_ACPI_BOOT */ - -#ifdef CONFIG_ACPI_PCI -static inline void acpi_noirq_set(void) { acpi_noirq = 1; } -extern int acpi_irq_balance_set(char *str); -#else -static inline void acpi_noirq_set(void) { } -static inline int acpi_irq_balance_set(char *str) { return 0; } -#endif - -#ifdef CONFIG_ACPI_SLEEP - -extern unsigned long saved_eip; -extern unsigned long saved_esp; -extern unsigned long saved_ebp; -extern unsigned long saved_ebx; -extern unsigned long saved_esi; -extern unsigned long saved_edi; - -static inline void acpi_save_register_state(unsigned long return_point) -{ - saved_eip = return_point; - asm volatile ("movl %%esp,(%0)" : "=m" (saved_esp)); - asm volatile ("movl %%ebp,(%0)" : "=m" (saved_ebp)); - asm volatile ("movl %%ebx,(%0)" : "=m" (saved_ebx)); - asm volatile ("movl %%edi,(%0)" : "=m" (saved_edi)); - asm volatile ("movl %%esi,(%0)" : "=m" (saved_esi)); -} - -#define acpi_restore_register_state() do {} while (0) - - -/* routines for saving/restoring kernel state */ -extern int acpi_save_state_mem(void); -extern int acpi_save_state_disk(void); -extern void acpi_restore_state_mem(void); - -extern unsigned long acpi_wakeup_address; - -extern void do_suspend_lowlevel_s4bios(int resume); - -/* early initialization routine */ -extern void acpi_reserve_bootmem(void); - -#endif /*CONFIG_ACPI_SLEEP*/ - - -#endif /*__KERNEL__*/ - -#endif /*_ASM_ACPI_H*/ diff --git a/xen/include/asm-i386/apic.h b/xen/include/asm-i386/apic.h deleted file mode 100644 index f97e0b32d8..0000000000 --- a/xen/include/asm-i386/apic.h +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef __ASM_APIC_H -#define __ASM_APIC_H - -#include -#include -#include -#include - -#ifdef CONFIG_X86_LOCAL_APIC - -#define APIC_DEBUG 0 - -#if APIC_DEBUG -#define Dprintk(x...) printk(x) -#else -#define Dprintk(x...) -#endif - -/* - * Basic functions accessing APICs. - */ - -static __inline void apic_write(unsigned long reg, unsigned long v) -{ - *((volatile unsigned long *)(APIC_BASE+reg)) = v; -} - -static __inline void apic_write_atomic(unsigned long reg, unsigned long v) -{ - xchg((volatile unsigned long *)(APIC_BASE+reg), v); -} - -static __inline unsigned long apic_read(unsigned long reg) -{ - return *((volatile unsigned long *)(APIC_BASE+reg)); -} - -static __inline__ void apic_wait_icr_idle(void) -{ - do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ); -} - -#ifdef CONFIG_X86_GOOD_APIC -# define FORCE_READ_AROUND_WRITE 0 -# define apic_read_around(x) -# define apic_write_around(x,y) apic_write((x),(y)) -#else -# define FORCE_READ_AROUND_WRITE 1 -# define apic_read_around(x) apic_read(x) -# define apic_write_around(x,y) apic_write_atomic((x),(y)) -#endif - -static inline void ack_APIC_irq(void) -{ - /* - * ack_APIC_irq() actually gets compiled as a single instruction: - * - a single rmw on Pentium/82489DX - * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) - * ... yummie. - */ - - /* Docs say use 0 for future compatibility */ - apic_write_around(APIC_EOI, 0); -} - -extern int get_maxlvt(void); -extern void clear_local_APIC(void); -extern void connect_bsp_APIC (void); -extern void disconnect_bsp_APIC (void); -extern void disable_local_APIC (void); -extern int verify_local_APIC (void); -extern void cache_APIC_registers (void); -extern void sync_Arb_IDs (void); -extern void init_bsp_APIC (void); -extern void setup_local_APIC (void); -extern void init_apic_mappings (void); -extern void smp_local_timer_interrupt (struct pt_regs * regs); -extern void setup_APIC_clocks (void); -extern void setup_apic_nmi_watchdog (void); -extern inline void nmi_watchdog_tick (struct pt_regs * regs); -extern int APIC_init_uniprocessor (void); -extern void disable_APIC_timer(void); -extern void enable_APIC_timer(void); - -/*extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);*/ -/*extern void apic_pm_unregister(struct pm_dev*);*/ - -extern unsigned int watchdog_on; - -extern unsigned int apic_timer_irqs [NR_CPUS]; -extern int check_nmi_watchdog (void); - -extern unsigned int nmi_watchdog; -#define NMI_NONE 0 -#define NMI_IO_APIC 1 -#define NMI_LOCAL_APIC 2 -#define NMI_INVALID 3 - -#endif /* CONFIG_X86_LOCAL_APIC */ - -#endif /* __ASM_APIC_H */ diff --git a/xen/include/asm-i386/apicdef.h b/xen/include/asm-i386/apicdef.h deleted file mode 100644 index 9f07409b3f..0000000000 --- a/xen/include/asm-i386/apicdef.h +++ /dev/null @@ -1,379 +0,0 @@ -#ifndef __ASM_APICDEF_H -#define __ASM_APICDEF_H - -/* - * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) - * - * Alan Cox , 1995. - * Ingo Molnar , 1999, 2000 - */ - -#define APIC_DEFAULT_PHYS_BASE 0xfee00000 - -#define APIC_ID 0x20 -#define APIC_ID_MASK (0x0F<<24) -#define GET_APIC_ID(x) (((x)>>24)&0x0F) -#define APIC_LVR 0x30 -#define APIC_LVR_MASK 0xFF00FF -#define GET_APIC_VERSION(x) ((x)&0xFF) -#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF) -#define APIC_INTEGRATED(x) ((x)&0xF0) -#define APIC_XAPIC_SUPPORT(x) ((x)>=0x14) -#define APIC_TASKPRI 0x80 -#define APIC_TPRI_MASK 0xFF -#define APIC_ARBPRI 0x90 -#define APIC_ARBPRI_MASK 0xFF -#define APIC_PROCPRI 0xA0 -#define APIC_EOI 0xB0 -#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */ -#define APIC_RRR 0xC0 -#define APIC_LDR 0xD0 -#define APIC_LDR_MASK (0xFF<<24) -#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF) -#define SET_APIC_LOGICAL_ID(x) (((x)<<24)) -#define APIC_ALL_CPUS 0xFF -#define APIC_DFR 0xE0 -#define APIC_DFR_CLUSTER 0x0FFFFFFFul /* Clustered */ -#define APIC_DFR_FLAT 0xFFFFFFFFul /* Flat mode */ -#define APIC_SPIV 0xF0 -#define APIC_SPIV_FOCUS_DISABLED (1<<9) -#define APIC_SPIV_APIC_ENABLED (1<<8) -#define APIC_ISR 0x100 -#define APIC_TMR 0x180 -#define APIC_IRR 0x200 -#define APIC_ESR 0x280 -#define APIC_ESR_SEND_CS 0x00001 -#define APIC_ESR_RECV_CS 0x00002 -#define APIC_ESR_SEND_ACC 0x00004 -#define APIC_ESR_RECV_ACC 0x00008 -#define APIC_ESR_SENDILL 0x00020 -#define APIC_ESR_RECVILL 0x00040 -#define APIC_ESR_ILLREGA 0x00080 -#define APIC_ICR 0x300 -#define APIC_DEST_SELF 0x40000 -#define APIC_DEST_ALLINC 0x80000 -#define APIC_DEST_ALLBUT 0xC0000 -#define APIC_ICR_RR_MASK 0x30000 -#define APIC_ICR_RR_INVALID 0x00000 -#define APIC_ICR_RR_INPROG 0x10000 -#define APIC_ICR_RR_VALID 0x20000 -#define APIC_INT_LEVELTRIG 0x08000 -#define APIC_INT_ASSERT 0x04000 -#define APIC_ICR_BUSY 0x01000 -#define APIC_DEST_PHYSICAL 0x00000 -#define APIC_DEST_LOGICAL 0x00800 -#define APIC_DM_FIXED 0x00000 -#define APIC_DM_LOWEST 0x00100 -#define APIC_DM_SMI 0x00200 -#define APIC_DM_REMRD 0x00300 -#define APIC_DM_NMI 0x00400 -#define APIC_DM_INIT 0x00500 -#define APIC_DM_STARTUP 0x00600 -#define APIC_DM_EXTINT 0x00700 -#define APIC_VECTOR_MASK 0x000FF -#define APIC_ICR2 0x310 -#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF) -#define SET_APIC_DEST_FIELD(x) ((x)<<24) -#define APIC_LVTT 0x320 -#define APIC_LVTPC 0x340 -#define APIC_LVT0 0x350 -#define APIC_LVT_TIMER_BASE_MASK (0x3<<18) -#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) -#define SET_APIC_TIMER_BASE(x) (((x)<<18)) -#define APIC_TIMER_BASE_CLKIN 0x0 -#define APIC_TIMER_BASE_TMBASE 0x1 -#define APIC_TIMER_BASE_DIV 0x2 -#define APIC_LVT_TIMER_PERIODIC (1<<17) -#define APIC_LVT_MASKED (1<<16) -#define APIC_LVT_LEVEL_TRIGGER (1<<15) -#define APIC_LVT_REMOTE_IRR (1<<14) -#define APIC_INPUT_POLARITY (1<<13) -#define APIC_SEND_PENDING (1<<12) -#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) -#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) -#define APIC_MODE_FIXED 0x0 -#define APIC_MODE_NMI 0x4 -#define APIC_MODE_EXINT 0x7 -#define APIC_LVT1 0x360 -#define APIC_LVTERR 0x370 -#define APIC_TMICT 0x380 -#define APIC_TMCCT 0x390 -#define APIC_TDCR 0x3E0 -#define APIC_TDR_DIV_TMBASE (1<<2) -#define APIC_TDR_DIV_1 0xB -#define APIC_TDR_DIV_2 0x0 -#define APIC_TDR_DIV_4 0x1 -#define APIC_TDR_DIV_8 0x2 -#define APIC_TDR_DIV_16 0x3 -#define APIC_TDR_DIV_32 0x8 -#define APIC_TDR_DIV_64 0x9 -#define APIC_TDR_DIV_128 0xA - -#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) - -#ifdef CONFIG_X86_CLUSTERED_APIC -#define MAX_IO_APICS 32 -#else -#define MAX_IO_APICS 8 -#endif - - -/* - * The broadcast ID is 0xF for old APICs and 0xFF for xAPICs. SAPICs - * don't broadcast (yet?), but if they did, they might use 0xFFFF. - */ -#define APIC_BROADCAST_ID_XAPIC (0xFF) -#define APIC_BROADCAST_ID_APIC (0x0F) - -/* - * the local APIC register structure, memory mapped. Not terribly well - * tested, but we might eventually use this one in the future - the - * problem why we cannot use it right now is the P5 APIC, it has an - * errata which cannot take 8-bit reads and writes, only 32-bit ones ... - */ -#define u32 unsigned int - -#define lapic ((volatile struct local_apic *)APIC_BASE) - -struct local_apic { - -/*000*/ struct { u32 __reserved[4]; } __reserved_01; - -/*010*/ struct { u32 __reserved[4]; } __reserved_02; - -/*020*/ struct { /* APIC ID Register */ - u32 __reserved_1 : 24, - phys_apic_id : 4, - __reserved_2 : 4; - u32 __reserved[3]; - } id; - -/*030*/ const - struct { /* APIC Version Register */ - u32 version : 8, - __reserved_1 : 8, - max_lvt : 8, - __reserved_2 : 8; - u32 __reserved[3]; - } version; - -/*040*/ struct { u32 __reserved[4]; } __reserved_03; - -/*050*/ struct { u32 __reserved[4]; } __reserved_04; - -/*060*/ struct { u32 __reserved[4]; } __reserved_05; - -/*070*/ struct { u32 __reserved[4]; } __reserved_06; - -/*080*/ struct { /* Task Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } tpr; - -/*090*/ const - struct { /* Arbitration Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } apr; - -/*0A0*/ const - struct { /* Processor Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } ppr; - -/*0B0*/ struct { /* End Of Interrupt Register */ - u32 eoi; - u32 __reserved[3]; - } eoi; - -/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; - -/*0D0*/ struct { /* Logical Destination Register */ - u32 __reserved_1 : 24, - logical_dest : 8; - u32 __reserved_2[3]; - } ldr; - -/*0E0*/ struct { /* Destination Format Register */ - u32 __reserved_1 : 28, - model : 4; - u32 __reserved_2[3]; - } dfr; - -/*0F0*/ struct { /* Spurious Interrupt Vector Register */ - u32 spurious_vector : 8, - apic_enabled : 1, - focus_cpu : 1, - __reserved_2 : 22; - u32 __reserved_3[3]; - } svr; - -/*100*/ struct { /* In Service Register */ -/*170*/ u32 bitfield; - u32 __reserved[3]; - } isr [8]; - -/*180*/ struct { /* Trigger Mode Register */ -/*1F0*/ u32 bitfield; - u32 __reserved[3]; - } tmr [8]; - -/*200*/ struct { /* Interrupt Request Register */ -/*270*/ u32 bitfield; - u32 __reserved[3]; - } irr [8]; - -/*280*/ union { /* Error Status Register */ - struct { - u32 send_cs_error : 1, - receive_cs_error : 1, - send_accept_error : 1, - receive_accept_error : 1, - __reserved_1 : 1, - send_illegal_vector : 1, - receive_illegal_vector : 1, - illegal_register_address : 1, - __reserved_2 : 24; - u32 __reserved_3[3]; - } error_bits; - struct { - u32 errors; - u32 __reserved_3[3]; - } all_errors; - } esr; - -/*290*/ struct { u32 __reserved[4]; } __reserved_08; - -/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; - -/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; - -/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; - -/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; - -/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; - -/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; - -/*300*/ struct { /* Interrupt Command Register 1 */ - u32 vector : 8, - delivery_mode : 3, - destination_mode : 1, - delivery_status : 1, - __reserved_1 : 1, - level : 1, - trigger : 1, - __reserved_2 : 2, - shorthand : 2, - __reserved_3 : 12; - u32 __reserved_4[3]; - } icr1; - -/*310*/ struct { /* Interrupt Command Register 2 */ - union { - u32 __reserved_1 : 24, - phys_dest : 4, - __reserved_2 : 4; - u32 __reserved_3 : 24, - logical_dest : 8; - } dest; - u32 __reserved_4[3]; - } icr2; - -/*320*/ struct { /* LVT - Timer */ - u32 vector : 8, - __reserved_1 : 4, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - timer_mode : 1, - __reserved_3 : 14; - u32 __reserved_4[3]; - } lvt_timer; - -/*330*/ struct { u32 __reserved[4]; } __reserved_15; - -/*340*/ struct { /* LVT - Performance Counter */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_pc; - -/*350*/ struct { /* LVT - LINT0 */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - polarity : 1, - remote_irr : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15; - u32 __reserved_3[3]; - } lvt_lint0; - -/*360*/ struct { /* LVT - LINT1 */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - polarity : 1, - remote_irr : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15; - u32 __reserved_3[3]; - } lvt_lint1; - -/*370*/ struct { /* LVT - Error */ - u32 vector : 8, - __reserved_1 : 4, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_error; - -/*380*/ struct { /* Timer Initial Count Register */ - u32 initial_count; - u32 __reserved_2[3]; - } timer_icr; - -/*390*/ const - struct { /* Timer Current Count Register */ - u32 curr_count; - u32 __reserved_2[3]; - } timer_ccr; - -/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; - -/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; - -/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; - -/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; - -/*3E0*/ struct { /* Timer Divide Configuration Register */ - u32 divisor : 4, - __reserved_1 : 28; - u32 __reserved_2[3]; - } timer_dcr; - -/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; - -} __attribute__ ((packed)); - -#undef u32 - -#endif diff --git a/xen/include/asm-i386/atomic.h b/xen/include/asm-i386/atomic.h deleted file mode 100644 index c9f2e32763..0000000000 --- a/xen/include/asm-i386/atomic.h +++ /dev/null @@ -1,195 +0,0 @@ -#ifndef __ARCH_I386_ATOMIC__ -#define __ARCH_I386_ATOMIC__ - -#include - -/* - * Atomic operations that C can't guarantee us. Useful for - * resource counting etc.. - */ - -#ifdef CONFIG_SMP -#define LOCK "lock ; " -#else -#define LOCK "" -#endif - -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { volatile int counter; } atomic_t; - -#define ATOMIC_INIT(i) { (i) } - -/** - * atomic_read - read atomic variable - * @v: pointer of type atomic_t - * - * Atomically reads the value of @v. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -#define atomic_read(v) ((v)->counter) - -/** - * atomic_set - set atomic variable - * @v: pointer of type atomic_t - * @i: required value - * - * Atomically sets the value of @v to @i. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -#define atomic_set(v,i) (((v)->counter) = (i)) - -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v. Note that the guaranteed useful range - * of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_add(int i, atomic_t *v) -{ - __asm__ __volatile__( - LOCK "addl %1,%0" - :"=m" (v->counter) - :"ir" (i), "m" (v->counter)); -} - -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_sub(int i, atomic_t *v) -{ - __asm__ __volatile__( - LOCK "subl %1,%0" - :"=m" (v->counter) - :"ir" (i), "m" (v->counter)); -} - -/** - * atomic_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ int atomic_sub_and_test(int i, atomic_t *v) -{ - unsigned char c; - - __asm__ __volatile__( - LOCK "subl %2,%0; sete %1" - :"=m" (v->counter), "=qm" (c) - :"ir" (i), "m" (v->counter) : "memory"); - return c; -} - -/** - * atomic_inc - increment atomic variable - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_inc(atomic_t *v) -{ - __asm__ __volatile__( - LOCK "incl %0" - :"=m" (v->counter) - :"m" (v->counter)); -} - -/** - * atomic_dec - decrement atomic variable - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_dec(atomic_t *v) -{ - __asm__ __volatile__( - LOCK "decl %0" - :"=m" (v->counter) - :"m" (v->counter)); -} - -/** - * atomic_dec_and_test - decrement and test - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ int atomic_dec_and_test(atomic_t *v) -{ - unsigned char c; - - __asm__ __volatile__( - LOCK "decl %0; sete %1" - :"=m" (v->counter), "=qm" (c) - :"m" (v->counter) : "memory"); - return c != 0; -} - -/** - * atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ int atomic_inc_and_test(atomic_t *v) -{ - unsigned char c; - - __asm__ __volatile__( - LOCK "incl %0; sete %1" - :"=m" (v->counter), "=qm" (c) - :"m" (v->counter) : "memory"); - return c != 0; -} - -/** - * atomic_add_negative - add and test if negative - * @v: pointer of type atomic_t - * @i: integer value to add - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ int atomic_add_negative(int i, atomic_t *v) -{ - unsigned char c; - - __asm__ __volatile__( - LOCK "addl %2,%0; sets %1" - :"=m" (v->counter), "=qm" (c) - :"ir" (i), "m" (v->counter) : "memory"); - return c; -} - -/* Atomic operations are already serializing on x86 */ -#define smp_mb__before_atomic_dec() barrier() -#define smp_mb__after_atomic_dec() barrier() -#define smp_mb__before_atomic_inc() barrier() -#define smp_mb__after_atomic_inc() barrier() - -#endif diff --git a/xen/include/asm-i386/bitops.h b/xen/include/asm-i386/bitops.h deleted file mode 100644 index e98f6b356f..0000000000 --- a/xen/include/asm-i386/bitops.h +++ /dev/null @@ -1,368 +0,0 @@ -#ifndef _I386_BITOPS_H -#define _I386_BITOPS_H - -/* - * Copyright 1992, Linus Torvalds. - */ - -#include - -/* - * These have to be done with inline assembly: that way the bit-setting - * is guaranteed to be atomic. All bit operations return 0 if the bit - * was cleared before the operation and != 0 if it was not. - * - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). - */ - -#ifdef CONFIG_SMP -#define LOCK_PREFIX "lock ; " -#else -#define LOCK_PREFIX "" -#endif - -#define ADDR (*(volatile long *) addr) - -/** - * set_bit - Atomically set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * This function is atomic and may not be reordered. See __set_bit() - * if you do not require the atomic guarantees. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static __inline__ void set_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btsl %1,%0" - :"=m" (ADDR) - :"Ir" (nr)); -} - -/** - * __set_bit - Set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * Unlike set_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __inline__ void __set_bit(int nr, volatile void * addr) -{ - __asm__( - "btsl %1,%0" - :"=m" (ADDR) - :"Ir" (nr)); -} - -/** - * clear_bit - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and may not be reordered. However, it does - * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() - * in order to ensure changes are visible on other processors. - */ -static __inline__ void clear_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btrl %1,%0" - :"=m" (ADDR) - :"Ir" (nr)); -} -#define smp_mb__before_clear_bit() barrier() -#define smp_mb__after_clear_bit() barrier() - -/** - * __change_bit - Toggle a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * Unlike change_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __inline__ void __change_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__( - "btcl %1,%0" - :"=m" (ADDR) - :"Ir" (nr)); -} - -/** - * change_bit - Toggle a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * change_bit() is atomic and may not be reordered. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static __inline__ void change_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btcl %1,%0" - :"=m" (ADDR) - :"Ir" (nr)); -} - -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __inline__ int test_and_set_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr) : "memory"); - return oldbit; -} - -/** - * __test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static __inline__ int __test_and_set_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__( - "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr)); - return oldbit; -} - -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __inline__ int test_and_clear_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr) : "memory"); - return oldbit; -} - -/** - * __test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static __inline__ int __test_and_clear_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__( - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr)); - return oldbit; -} - -/* WARNING: non atomic and it can be reordered! */ -static __inline__ int __test_and_change_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__( - "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr) : "memory"); - return oldbit; -} - -/** - * test_and_change_bit - Change a bit and return its new value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __inline__ int test_and_change_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr) : "memory"); - return oldbit; -} - - -static __inline__ int constant_test_bit(int nr, const volatile void * addr) -{ - return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; -} - -static __inline__ int variable_test_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__( - "btl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit) - :"m" (ADDR),"Ir" (nr)); - return oldbit; -} - -#define test_bit(nr,addr) \ -(__builtin_constant_p(nr) ? \ - constant_test_bit((nr),(addr)) : \ - variable_test_bit((nr),(addr))) - -/** - * find_first_zero_bit - find the first zero bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search - * - * Returns the bit-number of the first zero bit, not the number of the byte - * containing a bit. - */ -static __inline__ int find_first_zero_bit(void * addr, unsigned size) -{ - int d0, d1, d2; - int res; - - if (!size) - return 0; - /* This looks at memory. Mark it volatile to tell gcc not to move it around */ - __asm__ __volatile__( - "movl $-1,%%eax\n\t" - "xorl %%edx,%%edx\n\t" - "repe; scasl\n\t" - "je 1f\n\t" - "xorl -4(%%edi),%%eax\n\t" - "subl $4,%%edi\n\t" - "bsfl %%eax,%%edx\n" - "1:\tsubl %%ebx,%%edi\n\t" - "shll $3,%%edi\n\t" - "addl %%edi,%%edx" - :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) - :"1" ((size + 31) >> 5), "2" (addr), "b" (addr)); - return res; -} - -/** - * find_next_zero_bit - find the first zero bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -static __inline__ int find_next_zero_bit (void * addr, int size, int offset) -{ - unsigned long * p = ((unsigned long *) addr) + (offset >> 5); - int set = 0, bit = offset & 31, res; - - if (bit) { - /* - * Look for zero in first byte - */ - __asm__("bsfl %1,%0\n\t" - "jne 1f\n\t" - "movl $32, %0\n" - "1:" - : "=r" (set) - : "r" (~(*p >> bit))); - if (set < (32 - bit)) - return set + offset; - set = 32 - bit; - p++; - } - /* - * No zero yet, search remaining full bytes for a zero - */ - res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr)); - return (offset + set + res); -} - -/** - * ffz - find first zero in word. - * @word: The word to search - * - * Undefined if no zero exists, so code should check against ~0UL first. - */ -static __inline__ unsigned long ffz(unsigned long word) -{ - __asm__("bsfl %1,%0" - :"=r" (word) - :"r" (~word)); - return word; -} - -/** - * ffs - find first bit set - * @x: the word to search - * - * This is defined the same way as - * the libc and compiler builtin ffs routines, therefore - * differs in spirit from the above ffz (man ffs). - */ -static __inline__ int ffs(int x) -{ - int r; - - __asm__("bsfl %1,%0\n\t" - "jnz 1f\n\t" - "movl $-1,%0\n" - "1:" : "=r" (r) : "g" (x)); - return r+1; -} - -/** - * hweightN - returns the hamming weight of a N-bit word - * @x: the word to weigh - * - * The Hamming Weight of a number is the total number of bits set in it. - */ - -#define hweight32(x) generic_hweight32(x) -#define hweight16(x) generic_hweight16(x) -#define hweight8(x) generic_hweight8(x) - -#define ext2_set_bit __test_and_set_bit -#define ext2_clear_bit __test_and_clear_bit -#define ext2_test_bit test_bit -#define ext2_find_first_zero_bit find_first_zero_bit -#define ext2_find_next_zero_bit find_next_zero_bit - -/* Bitmap functions for the minix filesystem. */ -#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr) -#define minix_set_bit(nr,addr) __set_bit(nr,addr) -#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr) -#define minix_test_bit(nr,addr) test_bit(nr,addr) -#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) - -#endif /* _I386_BITOPS_H */ diff --git a/xen/include/asm-i386/cache.h b/xen/include/asm-i386/cache.h deleted file mode 100644 index db954a06ed..0000000000 --- a/xen/include/asm-i386/cache.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * include/asm-i386/cache.h - */ -#ifndef __ARCH_I386_CACHE_H -#define __ARCH_I386_CACHE_H - -#include - -/* L1 cache line size */ -#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) - -#endif diff --git a/xen/include/asm-i386/config.h b/xen/include/asm-i386/config.h deleted file mode 100644 index bd3532e773..0000000000 --- a/xen/include/asm-i386/config.h +++ /dev/null @@ -1,146 +0,0 @@ -/****************************************************************************** - * config.h - * - * A Linux-style configuration list. - */ - -#ifndef __XEN_I386_CONFIG_H__ -#define __XEN_I386_CONFIG_H__ - -#define CONFIG_X86 1 - -#define CONFIG_SMP 1 -#define CONFIG_X86_LOCAL_APIC 1 -#define CONFIG_X86_IO_APIC 1 -#define CONFIG_X86_L1_CACHE_SHIFT 5 - -#define CONFIG_ACPI 1 -#define CONFIG_ACPI_BOOT 1 - -#define CONFIG_PCI 1 -#define CONFIG_PCI_BIOS 1 -#define CONFIG_PCI_DIRECT 1 - -#define CONFIG_IDE 1 -#define CONFIG_BLK_DEV_IDE 1 -#define CONFIG_BLK_DEV_IDEDMA 1 -#define CONFIG_BLK_DEV_IDEPCI 1 -#define CONFIG_IDEDISK_MULTI_MODE 1 -#define CONFIG_IDEDISK_STROKE 1 -#define CONFIG_IDEPCI_SHARE_IRQ 1 -#define CONFIG_BLK_DEV_IDEDMA_PCI 1 -#define CONFIG_IDEDMA_PCI_AUTO 1 -#define CONFIG_IDEDMA_AUTO 1 -#define CONFIG_IDEDMA_ONLYDISK 1 -#define CONFIG_BLK_DEV_IDE_MODES 1 -#define CONFIG_BLK_DEV_PIIX 1 - -#define CONFIG_SCSI 1 -#define CONFIG_SCSI_LOGGING 1 -#define CONFIG_BLK_DEV_SD 1 -#define CONFIG_SD_EXTRA_DEVS 40 -#define CONFIG_SCSI_MULTI_LUN 1 - -#define CONFIG_XEN_ATTENTION_KEY 1 - - -#define HZ 100 - -/* - * Just to keep compiler happy. - * NB. DO NOT CHANGE SMP_CACHE_BYTES WITHOUT FIXING arch/i386/entry.S!!! - * It depends on size of irq_cpustat_t, for example, being 64 bytes. :-) - * Mmmm... so niiiiiice.... - */ -#define SMP_CACHE_BYTES 64 -#define NR_CPUS 16 -#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) -#define ____cacheline_aligned __cacheline_aligned - -/*** Hypervisor owns top 64MB of virtual address space. ***/ -#define HYPERVISOR_VIRT_START (0xFC000000UL) - -/* - * First 4MB are mapped read-only for all. It's for the machine->physical - * mapping table (MPT table). The following are virtual addresses. - */ -#define READONLY_MPT_VIRT_START (HYPERVISOR_VIRT_START) -#define READONLY_MPT_VIRT_END (READONLY_MPT_VIRT_START + (4*1024*1024)) -/* - * Next 12MB is fixed monitor space, which is part of a 40MB direct-mapped - * memory region. The following are machine addresses. - */ -#define MAX_MONITOR_ADDRESS (12*1024*1024) -#define MAX_DIRECTMAP_ADDRESS (40*1024*1024) -/* And the virtual addresses for the direct-map region... */ -#define DIRECTMAP_VIRT_START (READONLY_MPT_VIRT_END) -#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS) -#define MONITOR_VIRT_START (DIRECTMAP_VIRT_START) -#define MONITOR_VIRT_END (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS) -#define RDWR_MPT_VIRT_START (MONITOR_VIRT_END) -#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (4*1024*1024)) -#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END) -#define FRAMETABLE_VIRT_END (DIRECTMAP_VIRT_END) -/* Next 4MB of virtual address space is used as a linear p.t. mapping. */ -#define LINEAR_PT_VIRT_START (DIRECTMAP_VIRT_END) -#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + (4*1024*1024)) -/* Next 4MB of virtual address space is used as a shadow linear p.t. map. */ -#define SH_LINEAR_PT_VIRT_START (LINEAR_PT_VIRT_END) -#define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + (4*1024*1024)) -/* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */ -#define PERDOMAIN_VIRT_START (SH_LINEAR_PT_VIRT_END) -#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (4*1024*1024)) -#define GDT_VIRT_START (PERDOMAIN_VIRT_START) -#define GDT_VIRT_END (GDT_VIRT_START + (64*1024)) -#define LDT_VIRT_START (GDT_VIRT_END) -#define LDT_VIRT_END (LDT_VIRT_START + (64*1024)) -/* Penultimate 4MB of virtual address space used for domain page mappings. */ -#define MAPCACHE_VIRT_START (PERDOMAIN_VIRT_END) -#define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + (4*1024*1024)) -/* Final 4MB of virtual address space used for ioremap(). */ -#define IOREMAP_VIRT_START (MAPCACHE_VIRT_END) -#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (4*1024*1024)) - -/* - * Amount of slack domain memory to leave in system, in megabytes. - * Prevents a hard out-of-memory crunch for thinsg like network receive. - */ -#define SLACK_DOMAIN_MEM_KILOBYTES 2048 - -/* Linkage for x86 */ -#define FASTCALL(x) x __attribute__((regparm(3))) -#define asmlinkage __attribute__((regparm(0))) -#define __ALIGN .align 16,0x90 -#define __ALIGN_STR ".align 16,0x90" -#define SYMBOL_NAME_STR(X) #X -#define SYMBOL_NAME(X) X -#define SYMBOL_NAME_LABEL(X) X##: -#ifdef __ASSEMBLY__ -#define ALIGN __ALIGN -#define ALIGN_STR __ALIGN_STR -#define ENTRY(name) \ - .globl SYMBOL_NAME(name); \ - ALIGN; \ - SYMBOL_NAME_LABEL(name) -#endif - -#define PGT_base_page_table PGT_l2_page_table - -#define barrier() __asm__ __volatile__("": : :"memory") - -#define __HYPERVISOR_CS 0x0808 -#define __HYPERVISOR_DS 0x0810 - -#define NR_syscalls 256 - -#ifndef NDEBUG -#define MEMORY_GUARD -#endif - -#ifndef __ASSEMBLY__ -extern unsigned long _end; /* standard ELF symbol */ -extern void __out_of_line_bug(int line) __attribute__((noreturn)); -#define out_of_line_bug() __out_of_line_bug(__LINE__) -#endif /* __ASSEMBLY__ */ - -#endif /* __XEN_I386_CONFIG_H__ */ diff --git a/xen/include/asm-i386/cpufeature.h b/xen/include/asm-i386/cpufeature.h deleted file mode 100644 index f78a86891e..0000000000 --- a/xen/include/asm-i386/cpufeature.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * cpufeature.h - * - * Defines x86 CPU feature bits - */ - -#ifndef __ASM_I386_CPUFEATURE_H -#define __ASM_I386_CPUFEATURE_H - -/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */ -#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT) - -#define NCAPINTS 6 /* Currently we have 6 32-bit words worth of info */ - -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ -#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ -#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ -#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ -#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ -#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ -#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ - /* of FPU context), and CR4.OSFXSR available */ -#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ -#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ -#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ -#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ -#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ -#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ - -/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ -/* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ -#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ -#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ -#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ - -/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ - -/* Other features, Linux-defined mapping, word 3 */ -/* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ -/* cpu types for specific tunings: */ -#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */ -#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */ -#define X86_FEATURE_P3 (3*32+ 6) /* P3 */ -#define X86_FEATURE_P4 (3*32+ 7) /* P4 */ - -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ -#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ - -/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ - - -#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) -#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) - -#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) -#define cpu_has_de boot_cpu_has(X86_FEATURE_DE) -#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) -#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) -#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE) -#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) -#define cpu_has_sse2 boot_cpu_has(X86_FEATURE_XMM2) -#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) -#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) -#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR) -#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX) -#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) -#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) -#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) -#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) -#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) -#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) -#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) -#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) - -#endif /* __ASM_I386_CPUFEATURE_H */ - -/* - * Local Variables: - * mode:c - * comment-column:42 - * End: - */ diff --git a/xen/include/asm-i386/current.h b/xen/include/asm-i386/current.h deleted file mode 100644 index ee5b4b8516..0000000000 --- a/xen/include/asm-i386/current.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef _I386_CURRENT_H -#define _I386_CURRENT_H - -struct task_struct; - -#define STACK_RESERVED \ - (sizeof(execution_context_t) + sizeof(struct task_struct *)) - -static inline struct task_struct * get_current(void) -{ - struct task_struct *current; - __asm__ ( "orl %%esp,%0; andl $~3,%0; movl (%0),%0" - : "=r" (current) : "0" (STACK_SIZE-4) ); - return current; -} - -#define current get_current() - -static inline void set_current(struct task_struct *p) -{ - __asm__ ( "orl %%esp,%0; andl $~3,%0; movl %1,(%0)" - : : "r" (STACK_SIZE-4), "r" (p) ); -} - -static inline execution_context_t *get_execution_context(void) -{ - execution_context_t *execution_context; - __asm__ ( "andl %%esp,%0; addl %2,%0" - : "=r" (execution_context) - : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) ); - return execution_context; -} - -static inline unsigned long get_stack_top(void) -{ - unsigned long p; - __asm__ ( "orl %%esp,%0; andl $~3,%0" - : "=r" (p) : "0" (STACK_SIZE-4) ); - return p; -} - -#define schedule_tail(_p) \ - __asm__ __volatile__ ( \ - "andl %%esp,%0; addl %2,%0; movl %0,%%esp; jmp *%1" \ - : : "r" (~(STACK_SIZE-1)), \ - "r" (unlikely(is_idle_task((_p))) ? \ - continue_cpu_idle_loop : \ - continue_nonidle_task), \ - "i" (STACK_SIZE-STACK_RESERVED) ) - - -#endif /* !(_I386_CURRENT_H) */ diff --git a/xen/include/asm-i386/debugreg.h b/xen/include/asm-i386/debugreg.h deleted file mode 100644 index f0b2b06ae0..0000000000 --- a/xen/include/asm-i386/debugreg.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef _I386_DEBUGREG_H -#define _I386_DEBUGREG_H - - -/* Indicate the register numbers for a number of the specific - debug registers. Registers 0-3 contain the addresses we wish to trap on */ -#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */ -#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */ - -#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */ -#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */ - -/* Define a few things for the status register. We can use this to determine - which debugging register was responsible for the trap. The other bits - are either reserved or not of interest to us. */ - -#define DR_TRAP0 (0x1) /* db0 */ -#define DR_TRAP1 (0x2) /* db1 */ -#define DR_TRAP2 (0x4) /* db2 */ -#define DR_TRAP3 (0x8) /* db3 */ - -#define DR_STEP (0x4000) /* single-step */ -#define DR_SWITCH (0x8000) /* task switch */ - -/* Now define a bunch of things for manipulating the control register. - The top two bytes of the control register consist of 4 fields of 4 - bits - each field corresponds to one of the four debug registers, - and indicates what types of access we trap on, and how large the data - field is that we are looking at */ - -#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */ -#define DR_CONTROL_SIZE 4 /* 4 control bits per register */ - -#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */ -#define DR_RW_WRITE (0x1) -#define DR_RW_READ (0x3) - -#define DR_LEN_1 (0x0) /* Settings for data length to trap on */ -#define DR_LEN_2 (0x4) -#define DR_LEN_4 (0xC) - -/* The low byte to the control register determine which registers are - enabled. There are 4 fields of two bits. One bit is "local", meaning - that the processor will reset the bit after a task switch and the other - is global meaning that we have to explicitly reset the bit. With linux, - you can use either one, since we explicitly zero the register when we enter - kernel mode. */ - -#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ -#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ -#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ - -#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ -#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */ - -/* The second byte to the control register has a few special things. - We can slow the instruction pipeline for instructions coming via the - gdt or the ldt if we want to. I am not sure why this is an advantage */ - -#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */ -#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ -#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ - -#endif diff --git a/xen/include/asm-i386/delay.h b/xen/include/asm-i386/delay.h deleted file mode 100644 index 9e0adb4a27..0000000000 --- a/xen/include/asm-i386/delay.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _I386_DELAY_H -#define _I386_DELAY_H - -/* - * Copyright (C) 1993 Linus Torvalds - * - * Delay routines calling functions in arch/i386/lib/delay.c - */ - -extern unsigned long ticks_per_usec; -extern void __udelay(unsigned long usecs); -#define udelay(n) __udelay(n) - -#endif /* defined(_I386_DELAY_H) */ diff --git a/xen/include/asm-i386/desc.h b/xen/include/asm-i386/desc.h deleted file mode 100644 index 780f9c8728..0000000000 --- a/xen/include/asm-i386/desc.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef __ARCH_DESC_H -#define __ARCH_DESC_H - -#define LDT_ENTRY_SIZE 8 - -#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY - -#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8) -#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1) - -#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY) -#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY) - -#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) ) - -/* - * Guest OS must provide its own code selectors, or use the one we provide. The - * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector - * value is okay. Note that checking only the RPL is insufficient: if the - * selector is poked into an interrupt, trap or call gate then the RPL is - * ignored when the gate is accessed. - */ -#define VALID_SEL(_s) \ - (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \ - (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \ - ((_s)&4)) && \ - (((_s)&3) == 1)) -#define VALID_CODESEL(_s) ((_s) == FLAT_RING1_CS || VALID_SEL(_s)) - -/* These are bitmasks for the first 32 bits of a descriptor table entry. */ -#define _SEGMENT_TYPE (15<< 8) -#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */ -#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */ -#define _SEGMENT_P ( 1<<15) /* Segment Present */ -#define _SEGMENT_G ( 1<<23) /* Granularity */ - -#ifndef __ASSEMBLY__ -struct desc_struct { - unsigned long a,b; -}; - -extern struct desc_struct gdt_table[]; -extern struct desc_struct *idt, *gdt; - -struct Xgt_desc_struct { - unsigned short size; - unsigned long address __attribute__((packed)); -}; - -#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2)) -#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2)) - -extern void set_intr_gate(unsigned int irq, void * addr); -extern void set_tss_desc(unsigned int n, void *addr); - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/xen/include/asm-i386/div64.h b/xen/include/asm-i386/div64.h deleted file mode 100644 index ef915df700..0000000000 --- a/xen/include/asm-i386/div64.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef __I386_DIV64 -#define __I386_DIV64 - -#define do_div(n,base) ({ \ - unsigned long __upper, __low, __high, __mod; \ - asm("":"=a" (__low), "=d" (__high):"A" (n)); \ - __upper = __high; \ - if (__high) { \ - __upper = __high % (base); \ - __high = __high / (base); \ - } \ - asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (base), "0" (__low), "1" (__upper)); \ - asm("":"=A" (n):"a" (__low),"d" (__high)); \ - __mod; \ -}) - -#endif diff --git a/xen/include/asm-i386/dma.h b/xen/include/asm-i386/dma.h deleted file mode 100644 index 2d0af85306..0000000000 --- a/xen/include/asm-i386/dma.h +++ /dev/null @@ -1,298 +0,0 @@ -/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $ - * linux/include/asm/dma.h: Defines for using and allocating dma channels. - * Written by Hennus Bergman, 1992. - * High DMA channel support & info by Hannu Savolainen - * and John Boyd, Nov. 1992. - */ - -#ifndef _ASM_DMA_H -#define _ASM_DMA_H - -#include -#include /* And spinlocks */ -#include /* need byte IO */ -#include - - -#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER -#define dma_outb outb_p -#else -#define dma_outb outb -#endif - -#define dma_inb inb - -/* - * NOTES about DMA transfers: - * - * controller 1: channels 0-3, byte operations, ports 00-1F - * controller 2: channels 4-7, word operations, ports C0-DF - * - * - ALL registers are 8 bits only, regardless of transfer size - * - channel 4 is not used - cascades 1 into 2. - * - channels 0-3 are byte - addresses/counts are for physical bytes - * - channels 5-7 are word - addresses/counts are for physical words - * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries - * - transfer count loaded to registers is 1 less than actual count - * - controller 2 offsets are all even (2x offsets for controller 1) - * - page registers for 5-7 don't use data bit 0, represent 128K pages - * - page registers for 0-3 use bit 0, represent 64K pages - * - * DMA transfers are limited to the lower 16MB of _physical_ memory. - * Note that addresses loaded into registers must be _physical_ addresses, - * not logical addresses (which may differ if paging is active). - * - * Address mapping for channels 0-3: - * - * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) - * | ... | | ... | | ... | - * | ... | | ... | | ... | - * | ... | | ... | | ... | - * P7 ... P0 A7 ... A0 A7 ... A0 - * | Page | Addr MSB | Addr LSB | (DMA registers) - * - * Address mapping for channels 5-7: - * - * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) - * | ... | \ \ ... \ \ \ ... \ \ - * | ... | \ \ ... \ \ \ ... \ (not used) - * | ... | \ \ ... \ \ \ ... \ - * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 - * | Page | Addr MSB | Addr LSB | (DMA registers) - * - * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses - * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at - * the hardware level, so odd-byte transfers aren't possible). - * - * Transfer count (_not # bytes_) is limited to 64K, represented as actual - * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, - * and up to 128K bytes may be transferred on channels 5-7 in one operation. - * - */ - -#define MAX_DMA_CHANNELS 8 - -/* The maximum address that we can perform a DMA transfer to on this platform */ -/*#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000)*/ - -/* 8237 DMA controllers */ -#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ -#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ - -/* DMA controller registers */ -#define DMA1_CMD_REG 0x08 /* command register (w) */ -#define DMA1_STAT_REG 0x08 /* status register (r) */ -#define DMA1_REQ_REG 0x09 /* request register (w) */ -#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ -#define DMA1_MODE_REG 0x0B /* mode register (w) */ -#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ -#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ -#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ -#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ -#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ - -#define DMA2_CMD_REG 0xD0 /* command register (w) */ -#define DMA2_STAT_REG 0xD0 /* status register (r) */ -#define DMA2_REQ_REG 0xD2 /* request register (w) */ -#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ -#define DMA2_MODE_REG 0xD6 /* mode register (w) */ -#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ -#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ -#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ -#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ -#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ - -#define DMA_ADDR_0 0x00 /* DMA address registers */ -#define DMA_ADDR_1 0x02 -#define DMA_ADDR_2 0x04 -#define DMA_ADDR_3 0x06 -#define DMA_ADDR_4 0xC0 -#define DMA_ADDR_5 0xC4 -#define DMA_ADDR_6 0xC8 -#define DMA_ADDR_7 0xCC - -#define DMA_CNT_0 0x01 /* DMA count registers */ -#define DMA_CNT_1 0x03 -#define DMA_CNT_2 0x05 -#define DMA_CNT_3 0x07 -#define DMA_CNT_4 0xC2 -#define DMA_CNT_5 0xC6 -#define DMA_CNT_6 0xCA -#define DMA_CNT_7 0xCE - -#define DMA_PAGE_0 0x87 /* DMA page registers */ -#define DMA_PAGE_1 0x83 -#define DMA_PAGE_2 0x81 -#define DMA_PAGE_3 0x82 -#define DMA_PAGE_5 0x8B -#define DMA_PAGE_6 0x89 -#define DMA_PAGE_7 0x8A - -#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ -#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ -#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */ - -#define DMA_AUTOINIT 0x10 - - -extern spinlock_t dma_spin_lock; - -static __inline__ unsigned long claim_dma_lock(void) -{ - unsigned long flags; - spin_lock_irqsave(&dma_spin_lock, flags); - return flags; -} - -static __inline__ void release_dma_lock(unsigned long flags) -{ - spin_unlock_irqrestore(&dma_spin_lock, flags); -} - -/* enable/disable a specific DMA channel */ -static __inline__ void enable_dma(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(dmanr, DMA1_MASK_REG); - else - dma_outb(dmanr & 3, DMA2_MASK_REG); -} - -static __inline__ void disable_dma(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(dmanr | 4, DMA1_MASK_REG); - else - dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); -} - -/* Clear the 'DMA Pointer Flip Flop'. - * Write 0 for LSB/MSB, 1 for MSB/LSB access. - * Use this once to initialize the FF to a known state. - * After that, keep track of it. :-) - * --- In order to do that, the DMA routines below should --- - * --- only be used while holding the DMA lock ! --- - */ -static __inline__ void clear_dma_ff(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(0, DMA1_CLEAR_FF_REG); - else - dma_outb(0, DMA2_CLEAR_FF_REG); -} - -/* set mode (above) for a specific DMA channel */ -static __inline__ void set_dma_mode(unsigned int dmanr, char mode) -{ - if (dmanr<=3) - dma_outb(mode | dmanr, DMA1_MODE_REG); - else - dma_outb(mode | (dmanr&3), DMA2_MODE_REG); -} - -/* Set only the page register bits of the transfer address. - * This is used for successive transfers when we know the contents of - * the lower 16 bits of the DMA current address register, but a 64k boundary - * may have been crossed. - */ -static __inline__ void set_dma_page(unsigned int dmanr, char pagenr) -{ - switch(dmanr) { - case 0: - dma_outb(pagenr, DMA_PAGE_0); - break; - case 1: - dma_outb(pagenr, DMA_PAGE_1); - break; - case 2: - dma_outb(pagenr, DMA_PAGE_2); - break; - case 3: - dma_outb(pagenr, DMA_PAGE_3); - break; - case 5: - dma_outb(pagenr & 0xfe, DMA_PAGE_5); - break; - case 6: - dma_outb(pagenr & 0xfe, DMA_PAGE_6); - break; - case 7: - dma_outb(pagenr & 0xfe, DMA_PAGE_7); - break; - } -} - - -/* Set transfer address & page bits for specific DMA channel. - * Assumes dma flipflop is clear. - */ -static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a) -{ - set_dma_page(dmanr, a>>16); - if (dmanr <= 3) { - dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); - dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); - } else { - dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); - dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); - } -} - - -/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for - * a specific DMA channel. - * You must ensure the parameters are valid. - * NOTE: from a manual: "the number of transfers is one more - * than the initial word count"! This is taken into account. - * Assumes dma flip-flop is clear. - * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. - */ -static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) -{ - count--; - if (dmanr <= 3) { - dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); - dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); - } else { - dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); - dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); - } -} - - -/* Get DMA residue count. After a DMA transfer, this - * should return zero. Reading this while a DMA transfer is - * still in progress will return unpredictable results. - * If called before the channel has been used, it may return 1. - * Otherwise, it returns the number of _bytes_ left to transfer. - * - * Assumes DMA flip-flop is clear. - */ -static __inline__ int get_dma_residue(unsigned int dmanr) -{ - unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE - : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE; - - /* using short to get 16-bit wrap around */ - unsigned short count; - - count = 1 + dma_inb(io_port); - count += dma_inb(io_port) << 8; - - return (dmanr<=3)? count : (count<<1); -} - - -/* These are in kernel/dma.c: */ -extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */ -extern void free_dma(unsigned int dmanr); /* release it again */ - -/* From PCI */ - -#ifdef CONFIG_PCI -extern int isa_dma_bridge_buggy; -#else -#define isa_dma_bridge_buggy (0) -#endif - -#endif /* _ASM_DMA_H */ diff --git a/xen/include/asm-i386/domain_page.h b/xen/include/asm-i386/domain_page.h deleted file mode 100644 index d8cdf0b74e..0000000000 --- a/xen/include/asm-i386/domain_page.h +++ /dev/null @@ -1,29 +0,0 @@ -/****************************************************************************** - * domain_page.h - * - * Allow temporary mapping of domain page frames into Xen space. - */ - -#ifndef __ASM_DOMAIN_PAGE_H__ -#define __ASM_DOMAIN_PAGE_H__ - -#include -#include - -extern unsigned long *mapcache; -#define MAPCACHE_ENTRIES 1024 - -/* - * Maps a given physical address, returning corresponding virtual address. - * The entire page containing that VA is now accessible until a - * corresponding call to unmap_domain_mem(). - */ -extern void *map_domain_mem(unsigned long pa); - -/* - * Pass a VA within a page previously mapped with map_domain_mem(). - * That page will then be removed from the mapping lists. - */ -extern void unmap_domain_mem(void *va); - -#endif /* __ASM_DOMAIN_PAGE_H__ */ diff --git a/xen/include/asm-i386/fixmap.h b/xen/include/asm-i386/fixmap.h deleted file mode 100644 index fcfa97aee9..0000000000 --- a/xen/include/asm-i386/fixmap.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * fixmap.h: compile-time virtual memory allocation - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998 Ingo Molnar - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - */ - -#ifndef _ASM_FIXMAP_H -#define _ASM_FIXMAP_H - -#include -#include -#include -#include - -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. We allocate these special addresses - * from the end of virtual memory (0xfffff000) backwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * highger than 1) use fixmap_set(idx,phys) to associate - * physical memory with fixmap indices. - * - * TLB entries of such buffers will not be flushed across - * task switches. - */ - -/* - * on UP currently we will have no trace of the fixmap mechanizm, - * no page table allocations, etc. This might change in the - * future, say framebuffers for the console driver(s) could be - * fix-mapped? - */ -enum fixed_addresses { -#ifdef CONFIG_X86_LOCAL_APIC - FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ -#endif -#ifdef CONFIG_X86_IO_APIC - FIX_IO_APIC_BASE_0, - FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, -#endif -#ifdef CONFIG_HIGHMEM - FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -#endif -#ifdef CONFIG_ACPI_BOOT - FIX_ACPI_BEGIN, - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, -#endif - __end_of_fixed_addresses -}; - -extern void __set_fixmap (enum fixed_addresses idx, - l1_pgentry_t entry); - -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR)) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE)) -/* - * used by vmalloc.c. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap, and leave one page empty - * at the top of mem.. - */ -#define FIXADDR_TOP (0xffffe000UL) -#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -#endif diff --git a/xen/include/asm-i386/flushtlb.h b/xen/include/asm-i386/flushtlb.h deleted file mode 100644 index f0d4bb946c..0000000000 --- a/xen/include/asm-i386/flushtlb.h +++ /dev/null @@ -1,49 +0,0 @@ -/****************************************************************************** - * flushtlb.h - * - * TLB flushes are timestamped using a global virtual 'clock' which ticks - * on any TLB flush on any processor. - * - * Copyright (c) 2003, K A Fraser - */ - -#ifndef __FLUSHTLB_H__ -#define __FLUSHTLB_H__ - -#include - -/* - * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed. - * Therefore, if the current TLB time and a previously-read timestamp differ - * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock - * has wrapped at least once and every CPU's TLB is guaranteed to have been - * flushed meanwhile. - * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock. - */ -#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1) - -/* - * 'cpu_stamp' is the current timestamp for the CPU we are testing. - * 'lastuse_stamp' is a timestamp taken when the PFN we are testing was last - * used for a purpose that may have caused the CPU's TLB to become tainted. - */ -static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp) -{ - /* - * Why does this work? - * 1. XOR sets high-order bits determines if stamps from differing epochs. - * 2. Subtraction sets high-order bits if 'cpu_stamp > lastuse_stamp'. - * In either case a flush is unnecessary: we therefore OR the results from - * (1) and (2), mask the high-order bits, and return the inverse. - */ - return !(((lastuse_stamp^cpu_stamp)|(lastuse_stamp-cpu_stamp)) & - ~TLBCLOCK_EPOCH_MASK); -} - -extern u32 tlbflush_clock; -extern u32 tlbflush_time[NR_CPUS]; - -extern void tlb_clocktick(void); -extern void new_tlbflush_clock_period(void); - -#endif /* __FLUSHTLB_H__ */ diff --git a/xen/include/asm-i386/hardirq.h b/xen/include/asm-i386/hardirq.h deleted file mode 100644 index 5b3cb77c91..0000000000 --- a/xen/include/asm-i386/hardirq.h +++ /dev/null @@ -1,92 +0,0 @@ -#ifndef __ASM_HARDIRQ_H -#define __ASM_HARDIRQ_H - -#include -#include - -/* assembly code in softirq.h is sensitive to the offsets of these fields */ -typedef struct { - unsigned int __softirq_pending; - unsigned int __local_irq_count; - unsigned int __local_bh_count; - unsigned int __syscall_count; - unsigned int __nmi_count; - unsigned long idle_timestamp; -} ____cacheline_aligned irq_cpustat_t; - -#include /* Standard mappings for irq_cpustat_t above */ - -/* - * Are we in an interrupt context? Either doing bottom half - * or hardware interrupt processing? - */ -#define in_interrupt() ({ int __cpu = smp_processor_id(); \ - (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }) - -#define in_irq() (local_irq_count(smp_processor_id()) != 0) - -#ifndef CONFIG_SMP - -#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0) -#define hardirq_endlock(cpu) do { } while (0) - -#define irq_enter(cpu, irq) (local_irq_count(cpu)++) -#define irq_exit(cpu, irq) (local_irq_count(cpu)--) - -#define synchronize_irq() barrier() - -#else - -#include -#include - -extern unsigned char global_irq_holder; -extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */ - -static inline int irqs_running (void) -{ - int i; - - for (i = 0; i < smp_num_cpus; i++) - if (local_irq_count(i)) - return 1; - return 0; -} - -static inline void release_irqlock(int cpu) -{ - /* if we didn't own the irq lock, just ignore.. */ - if (global_irq_holder == (unsigned char) cpu) { - global_irq_holder = NO_PROC_ID; - clear_bit(0,&global_irq_lock); - } -} - -static inline void irq_enter(int cpu, int irq) -{ - ++local_irq_count(cpu); - - smp_mb(); - - while (test_bit(0,&global_irq_lock)) { - cpu_relax(); - } -} - -static inline void irq_exit(int cpu, int irq) -{ - --local_irq_count(cpu); -} - -static inline int hardirq_trylock(int cpu) -{ - return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock); -} - -#define hardirq_endlock(cpu) do { } while (0) - -extern void synchronize_irq(void); - -#endif /* CONFIG_SMP */ - -#endif /* __ASM_HARDIRQ_H */ diff --git a/xen/include/asm-i386/hdreg.h b/xen/include/asm-i386/hdreg.h deleted file mode 100644 index 2c4ca680f6..0000000000 --- a/xen/include/asm-i386/hdreg.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * linux/include/asm-i386/hdreg.h - * - * Copyright (C) 1994-1996 Linus Torvalds & authors - */ - -#ifndef __ASMi386_HDREG_H -#define __ASMi386_HDREG_H - -//typedef unsigned short ide_ioreg_t; -typedef unsigned long ide_ioreg_t; - -#endif /* __ASMi386_HDREG_H */ diff --git a/xen/include/asm-i386/i387.h b/xen/include/asm-i386/i387.h deleted file mode 100644 index 95a6bb6cde..0000000000 --- a/xen/include/asm-i386/i387.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * include/asm-i386/i387.h - * - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes , May 2000 - */ - -#ifndef __ASM_I386_I387_H -#define __ASM_I386_I387_H - -#include -#include - -extern void init_fpu(void); -extern void save_init_fpu( struct task_struct *tsk ); -extern void restore_fpu( struct task_struct *tsk ); - -#define unlazy_fpu( tsk ) do { \ - if ( test_bit(PF_USEDFPU, &tsk->flags) ) \ - save_init_fpu( tsk ); \ -} while (0) - -#define clear_fpu( tsk ) do { \ - if ( test_and_clear_bit(PF_USEDFPU, &tsk->flags) ) { \ - asm volatile("fwait"); \ - stts(); \ - } \ -} while (0) - -#define load_mxcsr( val ) do { \ - unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \ - asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \ -} while (0) - -#endif /* __ASM_I386_I387_H */ diff --git a/xen/include/asm-i386/ide.h b/xen/include/asm-i386/ide.h deleted file mode 100644 index e1b20f0758..0000000000 --- a/xen/include/asm-i386/ide.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * linux/include/asm-i386/ide.h - * - * Copyright (C) 1994-1996 Linus Torvalds & authors - */ - -/* - * This file contains the i386 architecture specific IDE code. - */ - -#ifndef __ASMi386_IDE_H -#define __ASMi386_IDE_H - -#ifdef __KERNEL__ - -#include - -#ifndef MAX_HWIFS -# ifdef CONFIG_BLK_DEV_IDEPCI -#define MAX_HWIFS 10 -# else -#define MAX_HWIFS 6 -# endif -#endif - -#define ide__sti() __sti() - -static __inline__ int ide_default_irq(ide_ioreg_t base) -{ - switch (base) { - case 0x1f0: return 14; - case 0x170: return 15; - case 0x1e8: return 11; - case 0x168: return 10; - case 0x1e0: return 8; - case 0x160: return 12; - default: - return 0; - } -} - -static __inline__ ide_ioreg_t ide_default_io_base(int index) -{ - switch (index) { - case 0: return 0x1f0; - case 1: return 0x170; - case 2: return 0x1e8; - case 3: return 0x168; - case 4: return 0x1e0; - case 5: return 0x160; - default: - return 0; - } -} - -static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq) -{ - ide_ioreg_t reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -static __inline__ void ide_init_default_hwifs(void) -{ -#ifndef CONFIG_BLK_DEV_IDEPCI - hw_regs_t hw; - int index; - - for(index = 0; index < MAX_HWIFS; index++) { - ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); - hw.irq = ide_default_irq(ide_default_io_base(index)); - ide_register_hw(&hw, NULL); - } -#endif /* CONFIG_BLK_DEV_IDEPCI */ -} - -typedef union { - unsigned all : 8; /* all of the bits together */ - struct { - unsigned head : 4; /* always zeros here */ - unsigned unit : 1; /* drive select number, 0 or 1 */ - unsigned bit5 : 1; /* always 1 */ - unsigned lba : 1; /* using LBA instead of CHS */ - unsigned bit7 : 1; /* always 1 */ - } b; -} select_t; - -typedef union { - unsigned all : 8; /* all of the bits together */ - struct { - unsigned bit0 : 1; - unsigned nIEN : 1; /* device INTRQ to host */ - unsigned SRST : 1; /* host soft reset bit */ - unsigned bit3 : 1; /* ATA-2 thingy */ - unsigned reserved456 : 3; - unsigned HOB : 1; /* 48-bit address ordering */ - } b; -} control_t; - -#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) -#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) -#define ide_check_region(from,extent) check_region((from), (extent)) -#define ide_request_region(from,extent,name) request_region((from), (extent), (name)) -#define ide_release_region(from,extent) release_region((from), (extent)) - -/* - * The following are not needed for the non-m68k ports - */ -#define ide_ack_intr(hwif) (1) -#define ide_fix_driveid(id) do {} while (0) -#define ide_release_lock(lock) do {} while (0) -#define ide_get_lock(lock, hdlr, data) do {} while (0) - -#endif /* __KERNEL__ */ - -#endif /* __ASMi386_IDE_H */ diff --git a/xen/include/asm-i386/io.h b/xen/include/asm-i386/io.h deleted file mode 100644 index f9e8cc936a..0000000000 --- a/xen/include/asm-i386/io.h +++ /dev/null @@ -1,284 +0,0 @@ -#ifndef _ASM_IO_H -#define _ASM_IO_H - -#include -#include - -#define IO_SPACE_LIMIT 0xffff - -/*#include */ - -/* - * Temporary debugging check to catch old code using - * unmapped ISA addresses. Will be removed in 2.4. - */ -#if CONFIG_DEBUG_IOVIRT - extern void *__io_virt_debug(unsigned long x, const char *file, int line); - extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); - #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) -//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__) -#else - #define __io_virt(x) ((void *)(x)) -//#define __io_phys(x) __pa(x) -#endif - - -/** - * virt_to_phys - map virtual addresses to physical - * @address: address to remap - * - * The returned physical address is the physical (CPU) mapping for - * the memory address given. It is only valid to use this function on - * addresses directly mapped or allocated via kmalloc. - * - * This function does not give bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline unsigned long virt_to_phys(volatile void * address) -{ - return __pa(address); -} - -/** - * phys_to_virt - map physical address to virtual - * @address: address to remap - * - * The returned virtual address is a current CPU mapping for - * the memory address given. It is only valid to use this function on - * addresses that have a kernel mapping - * - * This function does not handle bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline void * phys_to_virt(unsigned long address) -{ - return __va(address); -} - -/* - * Change "struct pfn_info" to physical address. - */ -#ifdef CONFIG_HIGHMEM64G -#define page_to_phys(page) ((u64)(page - frame_table) << PAGE_SHIFT) -#else -#define page_to_phys(page) ((page - frame_table) << PAGE_SHIFT) -#endif - -#define page_to_pfn(_page) ((unsigned long)((_page) - frame_table)) -#define page_to_virt(_page) phys_to_virt(page_to_phys(_page)) - - -extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); - -static inline void * ioremap (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, 0); -} - -/* - * This one maps high address device memory and turns off caching for that area. - * it's useful if some control registers are in such an area and write combining - * or read caching is not desirable: - */ -static inline void * ioremap_nocache (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, _PAGE_PCD); -} - -extern void iounmap(void *addr); - -/* - * IO bus memory addresses are also 1:1 with the physical address - */ -#define virt_to_bus virt_to_phys -#define bus_to_virt phys_to_virt -#define page_to_bus page_to_phys - -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -#define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) -#define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) -#define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl - -#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) -#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) -#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel - -#define memset_io(a,b,c) memset(__io_virt(a),(b),(c)) -#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c)) -#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c)) - -/* - * ISA space is 'always mapped' on a typical x86 system, no need to - * explicitly ioremap() it. The fact that the ISA IO space is mapped - * to PAGE_OFFSET is pure coincidence - it does not mean ISA values - * are physical addresses. The following constant pointer can be - * used as the IO-area pointer (it can be iounmapped as well, so the - * analogy with PCI is quite large): - */ -#define __ISA_IO_base ((char *)(PAGE_OFFSET)) - -#define isa_readb(a) readb(__ISA_IO_base + (a)) -#define isa_readw(a) readw(__ISA_IO_base + (a)) -#define isa_readl(a) readl(__ISA_IO_base + (a)) -#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) -#define isa_writew(w,a) writew(w,__ISA_IO_base + (a)) -#define isa_writel(l,a) writel(l,__ISA_IO_base + (a)) -#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c)) -#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c)) -#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c)) - - -/* - * Again, i386 does not require mem IO specific function. - */ - -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) -#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d)) - -static inline int check_signature(unsigned long io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -static inline int isa_check_signature(unsigned long io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (isa_readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -/* - * Cache management - * - * This needed for two cases - * 1. Out of order aware processors - * 2. Accidentally out of order processors (PPro errata #51) - */ - -#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) - -static inline void flush_write_buffers(void) -{ - __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); -} - -#define dma_cache_inv(_start,_size) flush_write_buffers() -#define dma_cache_wback(_start,_size) flush_write_buffers() -#define dma_cache_wback_inv(_start,_size) flush_write_buffers() - -#else - -/* Nothing to do */ - -#define dma_cache_inv(_start,_size) do { } while (0) -#define dma_cache_wback(_start,_size) do { } while (0) -#define dma_cache_wback_inv(_start,_size) do { } while (0) -#define flush_write_buffers() - -#endif - -#ifdef SLOW_IO_BY_JUMPING -#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" -#else -#define __SLOW_DOWN_IO "\noutb %%al,$0x80" -#endif - -#ifdef REALLY_SLOW_IO -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO -#else -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO -#endif - - -/* - * Talk about misusing macros.. - */ -#define __OUT1(s,x) \ -static inline void out##s(unsigned x value, unsigned short port) { - -#define __OUT2(s,s1,s2) \ -__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" - -#define __OUT(s,s1,x) \ -__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} - -#define __IN1(s) \ -static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; - -#define __IN2(s,s1,s2) \ -__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" - -#define __IN(s,s1,i...) \ -__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } - -#define __INS(s) \ -static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; ins" #s \ -: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define __OUTS(s) \ -static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; outs" #s \ -: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define RETURN_TYPE unsigned char -__IN(b,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned short -__IN(w,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned int -__IN(l,"") -#undef RETURN_TYPE - -__OUT(b,"b",char) -__OUT(w,"w",short) -__OUT(l,,int) - -__INS(b) -__INS(w) -__INS(l) - -__OUTS(b) -__OUTS(w) -__OUTS(l) - -#endif diff --git a/xen/include/asm-i386/io_apic.h b/xen/include/asm-i386/io_apic.h deleted file mode 100644 index 8b94875891..0000000000 --- a/xen/include/asm-i386/io_apic.h +++ /dev/null @@ -1,167 +0,0 @@ -#ifndef __ASM_IO_APIC_H -#define __ASM_IO_APIC_H - -#include -#include - -/* - * Intel IO-APIC support for SMP and UP systems. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar - */ - -#ifdef CONFIG_X86_IO_APIC - -#define APIC_MISMATCH_DEBUG - -#define IO_APIC_BASE(idx) \ - ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \ - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK))) - -/* - * The structure of the IO-APIC: - */ -struct IO_APIC_reg_00 { - __u32 __reserved_2 : 14, - LTS : 1, - delivery_type : 1, - __reserved_1 : 8, - ID : 4, - __reserved_0 : 4; -} __attribute__ ((packed)); - -struct IO_APIC_reg_01 { - __u32 version : 8, - __reserved_2 : 7, - PRQ : 1, - entries : 8, - __reserved_1 : 8; -} __attribute__ ((packed)); - -struct IO_APIC_reg_02 { - __u32 __reserved_2 : 24, - arbitration : 4, - __reserved_1 : 4; -} __attribute__ ((packed)); - -struct IO_APIC_reg_03 { - __u32 boot_DT : 1, - __reserved_1 : 31; -} __attribute__ ((packed)); - -/* - * # of IO-APICs and # of IRQ routing registers - */ -extern int nr_ioapics; -extern int nr_ioapic_registers[MAX_IO_APICS]; - -enum ioapic_irq_destination_types { - dest_Fixed = 0, - dest_LowestPrio = 1, - dest_SMI = 2, - dest__reserved_1 = 3, - dest_NMI = 4, - dest_INIT = 5, - dest__reserved_2 = 6, - dest_ExtINT = 7 -}; - -struct IO_APIC_route_entry { - __u32 vector : 8, - delivery_mode : 3, /* 000: FIXED - * 001: lowest prio - * 111: ExtINT - */ - dest_mode : 1, /* 0: physical, 1: logical */ - delivery_status : 1, - polarity : 1, - irr : 1, - trigger : 1, /* 0: edge, 1: level */ - mask : 1, /* 0: enabled, 1: disabled */ - __reserved_2 : 15; - - union { struct { __u32 - __reserved_1 : 24, - physical_dest : 4, - __reserved_2 : 4; - } physical; - - struct { __u32 - __reserved_1 : 24, - logical_dest : 8; - } logical; - } dest; - -} __attribute__ ((packed)); - -/* - * MP-BIOS irq configuration table structures: - */ - -/* I/O APIC entries */ -extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; - -/* # of MP IRQ source entries */ -extern int mp_irq_entries; - -/* MP IRQ source entries */ -extern struct mpc_config_intsrc *mp_irqs; - -/* non-0 if default (table-less) MP configuration */ -extern int mpc_default_type; - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - *IO_APIC_BASE(apic) = reg; - return *(IO_APIC_BASE(apic)+4); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - *IO_APIC_BASE(apic) = reg; - *(IO_APIC_BASE(apic)+4) = value; -} - -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - (void) *(IO_APIC_BASE(apic)+4); -} - -/* - * If we use the IO-APIC for IRQ routing, disable automatic - * assignment of PCI IRQ's. - */ -#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup) - -#ifdef CONFIG_ACPI_BOOT -extern int io_apic_get_unique_id (int ioapic, int apic_id); -extern int io_apic_get_version (int ioapic); -extern int io_apic_get_redir_entries (int ioapic); -extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low); -#endif - -extern int skip_ioapic_setup; /* 1 for "noapic" */ - -static inline void disable_ioapic_setup(void) -{ - skip_ioapic_setup = 1; -} - -static inline int ioapic_setup_disabled(void) -{ - return skip_ioapic_setup; -} - -#else /* !CONFIG_X86_IO_APIC */ -#define io_apic_assign_pci_irqs 0 - -static inline void disable_ioapic_setup(void) -{ } - -#endif /* !CONFIG_X86_IO_APIC */ - -#endif diff --git a/xen/include/asm-i386/irq.h b/xen/include/asm-i386/irq.h deleted file mode 100644 index 2c7c67a0da..0000000000 --- a/xen/include/asm-i386/irq.h +++ /dev/null @@ -1,204 +0,0 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar */ - -#include -#include - -#define SA_INTERRUPT 0x20000000 -#define SA_SHIRQ 0x04000000 -#define SA_NOPROFILE 0x02000000 - -#define SA_SAMPLE_RANDOM 0 /* Linux driver compatibility */ - -#define TIMER_IRQ 0 - -extern void disable_irq(unsigned int); -extern void disable_irq_nosync(unsigned int); -extern void enable_irq(unsigned int); - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x30 - -#define NR_IRQS (256 - FIRST_EXTERNAL_VECTOR) - -#define HYPERVISOR_CALL_VECTOR 0x82 - -/* - * Vectors 0x30-0x3f are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define INVALIDATE_TLB_VECTOR 0xfd -#define EVENT_CHECK_VECTOR 0xfc -#define CALL_FUNCTION_VECTOR 0xfb -#define KDB_VECTOR 0xfa - -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x40-0xee) - * we start at 0x41 to spread out vectors evenly between priority - * levels. (0x82 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR 0x41 -#define FIRST_SYSTEM_VECTOR 0xef - -extern int irq_vector[NR_IRQS]; -#define IO_APIC_VECTOR(irq) irq_vector[irq] - -/* - * Various low-level irq details needed by irq.c, process.c, - * time.c, io_apic.c and smp.c - * - * Interrupt entry/exit code at both C and assembly level - */ - -extern void mask_irq(unsigned int irq); -extern void unmask_irq(unsigned int irq); -extern void disable_8259A_irq(unsigned int irq); -extern void enable_8259A_irq(unsigned int irq); -extern int i8259A_irq_pending(unsigned int irq); -extern void make_8259A_irq(unsigned int irq); -extern void init_8259A(int aeoi); -extern void FASTCALL(send_IPI_self(int vector)); -extern void init_VISWS_APIC_irqs(void); -extern void setup_IO_APIC(void); -extern void disable_IO_APIC(void); -extern void print_IO_APIC(void); -extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); -extern void send_IPI(int dest, int vector); - -extern unsigned long io_apic_irqs; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; - -extern char _stext, _etext; - -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) - -#define __STR(x) #x -#define STR(x) __STR(x) - -#define SAVE_ALL \ - "cld\n\t" \ - "pushl %gs\n\t" \ - "pushl %fs\n\t" \ - "pushl %es\n\t" \ - "pushl %ds\n\t" \ - "pushl %eax\n\t" \ - "pushl %ebp\n\t" \ - "pushl %edi\n\t" \ - "pushl %esi\n\t" \ - "pushl %edx\n\t" \ - "pushl %ecx\n\t" \ - "pushl %ebx\n\t" \ - "movl $" STR(__HYPERVISOR_DS) ",%edx\n\t" \ - "movl %edx,%ds\n\t" \ - "movl %edx,%es\n\t" \ - "movl %edx,%fs\n\t" \ - "movl %edx,%gs\n\t" - -#define IRQ_NAME2(nr) nr##_interrupt(void) -#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - -/* - * SMP has a few special interrupts for IPI messages - */ - - /* there is a second layer of macro just to get the symbolic - name for the vector evaluated. This change is for RTLinux */ -#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v) -#define XBUILD_SMP_INTERRUPT(x,v)\ -asmlinkage void x(void); \ -asmlinkage void call_##x(void); \ -__asm__( \ -"\n"__ALIGN_STR"\n" \ -SYMBOL_NAME_STR(x) ":\n\t" \ - "pushl $"#v"-256\n\t" \ - SAVE_ALL \ - SYMBOL_NAME_STR(call_##x)":\n\t" \ - "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ - "jmp ret_from_intr\n"); - -#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v) -#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \ -asmlinkage void x(struct pt_regs * regs); \ -asmlinkage void call_##x(void); \ -__asm__( \ -"\n"__ALIGN_STR"\n" \ -SYMBOL_NAME_STR(x) ":\n\t" \ - "pushl $"#v"-256\n\t" \ - SAVE_ALL \ - "movl %esp,%eax\n\t" \ - "pushl %eax\n\t" \ - SYMBOL_NAME_STR(call_##x)":\n\t" \ - "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ - "addl $4,%esp\n\t" \ - "jmp ret_from_intr\n"); - -#define BUILD_COMMON_IRQ() \ -asmlinkage void call_do_IRQ(void); \ -__asm__( \ - "\n" __ALIGN_STR"\n" \ - "common_interrupt:\n\t" \ - SAVE_ALL \ - SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \ - "call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \ - "jmp ret_from_intr\n"); - -/* - * subtle. orig_eax is used by the signal code to distinct between - * system calls and interrupted 'random user-space'. Thus we have - * to put a negative value into orig_eax here. (the problem is that - * both system calls and IRQs want to have small integer numbers in - * orig_eax, and the syscall code has won the optimization conflict ;) - * - * Subtle as a pigs ear. VY - */ - -#define BUILD_IRQ(nr) \ -asmlinkage void IRQ_NAME(nr); \ -__asm__( \ -"\n"__ALIGN_STR"\n" \ -SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ - "pushl $"#nr"-256\n\t" \ - "jmp common_interrupt"); - -extern unsigned long prof_cpu_mask; -extern unsigned int * prof_buffer; -extern unsigned long prof_len; -extern unsigned long prof_shift; - -#include - -#if defined(CONFIG_X86_IO_APIC) -static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) { - if (IO_APIC_IRQ(i)) - send_IPI_self(IO_APIC_VECTOR(i)); -} -#else -static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {} -#endif - -#endif /* _ASM_HW_IRQ_H */ diff --git a/xen/include/asm-i386/ldt.h b/xen/include/asm-i386/ldt.h deleted file mode 100644 index 4da2a15afc..0000000000 --- a/xen/include/asm-i386/ldt.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __ARCH_LDT_H -#define __ARCH_LDT_H - -#ifndef __ASSEMBLY__ - -static inline void load_LDT(struct task_struct *p) -{ - unsigned int cpu; - struct desc_struct *desc; - unsigned long ents; - - if ( (ents = p->mm.ldt_ents) == 0 ) - { - __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) ); - } - else - { - cpu = smp_processor_id(); - desc = (struct desc_struct *)GET_GDT_ADDRESS(p) + __LDT(cpu); - desc->a = ((LDT_VIRT_START&0xffff)<<16) | (ents*8-1); - desc->b = (LDT_VIRT_START&(0xff<<24)) | 0x8200 | - ((LDT_VIRT_START&0xff0000)>>16); - __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) ); - } -} - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/xen/include/asm-i386/mc146818rtc.h b/xen/include/asm-i386/mc146818rtc.h deleted file mode 100644 index 8758528f7c..0000000000 --- a/xen/include/asm-i386/mc146818rtc.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Machine dependent access functions for RTC registers. - */ -#ifndef _ASM_MC146818RTC_H -#define _ASM_MC146818RTC_H - -#include -#include - -extern spinlock_t rtc_lock; /* serialize CMOS RAM access */ - -/********************************************************************** - * register summary - **********************************************************************/ -#define RTC_SECONDS 0 -#define RTC_SECONDS_ALARM 1 -#define RTC_MINUTES 2 -#define RTC_MINUTES_ALARM 3 -#define RTC_HOURS 4 -#define RTC_HOURS_ALARM 5 -/* RTC_*_alarm is always true if 2 MSBs are set */ -# define RTC_ALARM_DONT_CARE 0xC0 - -#define RTC_DAY_OF_WEEK 6 -#define RTC_DAY_OF_MONTH 7 -#define RTC_MONTH 8 -#define RTC_YEAR 9 - -/* control registers - Moto names - */ -#define RTC_REG_A 10 -#define RTC_REG_B 11 -#define RTC_REG_C 12 -#define RTC_REG_D 13 - -/********************************************************************** - * register details - **********************************************************************/ -#define RTC_FREQ_SELECT RTC_REG_A - -/* update-in-progress - set to "1" 244 microsecs before RTC goes off the bus, - * reset after update (may take 1.984ms @ 32768Hz RefClock) is complete, - * totalling to a max high interval of 2.228 ms. - */ -# define RTC_UIP 0x80 -# define RTC_DIV_CTL 0x70 - /* divider control: refclock values 4.194 / 1.049 MHz / 32.768 kHz */ -# define RTC_REF_CLCK_4MHZ 0x00 -# define RTC_REF_CLCK_1MHZ 0x10 -# define RTC_REF_CLCK_32KHZ 0x20 - /* 2 values for divider stage reset, others for "testing purposes only" */ -# define RTC_DIV_RESET1 0x60 -# define RTC_DIV_RESET2 0x70 - /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ -# define RTC_RATE_SELECT 0x0F - -/**********************************************************************/ -#define RTC_CONTROL RTC_REG_B -# define RTC_SET 0x80 /* disable updates for clock setting */ -# define RTC_PIE 0x40 /* periodic interrupt enable */ -# define RTC_AIE 0x20 /* alarm interrupt enable */ -# define RTC_UIE 0x10 /* update-finished interrupt enable */ -# define RTC_SQWE 0x08 /* enable square-wave output */ -# define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ -# define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ -# define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */ - -/**********************************************************************/ -#define RTC_INTR_FLAGS RTC_REG_C -/* caution - cleared by read */ -# define RTC_IRQF 0x80 /* any of the following 3 is active */ -# define RTC_PF 0x40 -# define RTC_AF 0x20 -# define RTC_UF 0x10 - -/**********************************************************************/ -#define RTC_VALID RTC_REG_D -# define RTC_VRT 0x80 /* valid RAM and time */ -/**********************************************************************/ - -/* example: !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) - * determines if the following two #defines are needed - */ -#ifndef BCD_TO_BIN -#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10) -#endif - -#ifndef BIN_TO_BCD -#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10) -#endif - - -#ifndef RTC_PORT -#define RTC_PORT(x) (0x70 + (x)) -#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ -#endif - -/* - * The yet supported machines all access the RTC index register via - * an ISA port access but the way to access the date register differs ... - */ -#define CMOS_READ(addr) ({ \ -outb_p((addr),RTC_PORT(0)); \ -inb_p(RTC_PORT(1)); \ -}) -#define CMOS_WRITE(val, addr) ({ \ -outb_p((addr),RTC_PORT(0)); \ -outb_p((val),RTC_PORT(1)); \ -}) - -#define RTC_IRQ 8 - -#endif /* _ASM_MC146818RTC_H */ diff --git a/xen/include/asm-i386/mpspec.h b/xen/include/asm-i386/mpspec.h deleted file mode 100644 index 7a20710c2a..0000000000 --- a/xen/include/asm-i386/mpspec.h +++ /dev/null @@ -1,242 +0,0 @@ -#ifndef __ASM_MPSPEC_H -#define __ASM_MPSPEC_H - -#include -#include - -/* - * Structure definitions for SMP machines following the - * Intel Multiprocessing Specification 1.1 and 1.4. - */ - -/* - * This tag identifies where the SMP configuration - * information is. - */ - -#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_') - -/* - * a maximum of 16 APICs with the current APIC ID architecture. - * xAPICs can have up to 256. SAPICs have 16 ID bits. - */ -#ifdef CONFIG_X86_CLUSTERED_APIC -#define MAX_APICS 256 -#else -#define MAX_APICS 16 -#endif - -#define MAX_MPC_ENTRY 1024 - -struct intel_mp_floating -{ - char mpf_signature[4]; /* "_MP_" */ - unsigned long mpf_physptr; /* Configuration table address */ - unsigned char mpf_length; /* Our length (paragraphs) */ - unsigned char mpf_specification;/* Specification version */ - unsigned char mpf_checksum; /* Checksum (makes sum 0) */ - unsigned char mpf_feature1; /* Standard or configuration ? */ - unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ - unsigned char mpf_feature3; /* Unused (0) */ - unsigned char mpf_feature4; /* Unused (0) */ - unsigned char mpf_feature5; /* Unused (0) */ -}; - -struct mp_config_table -{ - char mpc_signature[4]; -#define MPC_SIGNATURE "PCMP" - unsigned short mpc_length; /* Size of table */ - char mpc_spec; /* 0x01 */ - char mpc_checksum; - char mpc_oem[8]; - char mpc_productid[12]; - unsigned long mpc_oemptr; /* 0 if not present */ - unsigned short mpc_oemsize; /* 0 if not present */ - unsigned short mpc_oemcount; - unsigned long mpc_lapic; /* APIC address */ - unsigned long reserved; -}; - -/* Followed by entries */ - -#define MP_PROCESSOR 0 -#define MP_BUS 1 -#define MP_IOAPIC 2 -#define MP_INTSRC 3 -#define MP_LINTSRC 4 -#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */ - -struct mpc_config_processor -{ - unsigned char mpc_type; - unsigned char mpc_apicid; /* Local APIC number */ - unsigned char mpc_apicver; /* Its versions */ - unsigned char mpc_cpuflag; -#define CPU_ENABLED 1 /* Processor is available */ -#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ - unsigned long mpc_cpufeature; -#define CPU_STEPPING_MASK 0x0F -#define CPU_MODEL_MASK 0xF0 -#define CPU_FAMILY_MASK 0xF00 - unsigned long mpc_featureflag; /* CPUID feature value */ - unsigned long mpc_reserved[2]; -}; - -struct mpc_config_bus -{ - unsigned char mpc_type; - unsigned char mpc_busid; - unsigned char mpc_bustype[6] __attribute((packed)); -}; - -/* List of Bus Type string values, Intel MP Spec. */ -#define BUSTYPE_EISA "EISA" -#define BUSTYPE_ISA "ISA" -#define BUSTYPE_INTERN "INTERN" /* Internal BUS */ -#define BUSTYPE_MCA "MCA" -#define BUSTYPE_VL "VL" /* Local bus */ -#define BUSTYPE_PCI "PCI" -#define BUSTYPE_PCMCIA "PCMCIA" -#define BUSTYPE_CBUS "CBUS" -#define BUSTYPE_CBUSII "CBUSII" -#define BUSTYPE_FUTURE "FUTURE" -#define BUSTYPE_MBI "MBI" -#define BUSTYPE_MBII "MBII" -#define BUSTYPE_MPI "MPI" -#define BUSTYPE_MPSA "MPSA" -#define BUSTYPE_NUBUS "NUBUS" -#define BUSTYPE_TC "TC" -#define BUSTYPE_VME "VME" -#define BUSTYPE_XPRESS "XPRESS" - -struct mpc_config_ioapic -{ - unsigned char mpc_type; - unsigned char mpc_apicid; - unsigned char mpc_apicver; - unsigned char mpc_flags; -#define MPC_APIC_USABLE 0x01 - unsigned long mpc_apicaddr; -}; - -struct mpc_config_intsrc -{ - unsigned char mpc_type; - unsigned char mpc_irqtype; - unsigned short mpc_irqflag; - unsigned char mpc_srcbus; - unsigned char mpc_srcbusirq; - unsigned char mpc_dstapic; - unsigned char mpc_dstirq; -}; - -enum mp_irq_source_types { - mp_INT = 0, - mp_NMI = 1, - mp_SMI = 2, - mp_ExtINT = 3 -}; - -#define MP_IRQDIR_DEFAULT 0 -#define MP_IRQDIR_HIGH 1 -#define MP_IRQDIR_LOW 3 - - -struct mpc_config_lintsrc -{ - unsigned char mpc_type; - unsigned char mpc_irqtype; - unsigned short mpc_irqflag; - unsigned char mpc_srcbusid; - unsigned char mpc_srcbusirq; - unsigned char mpc_destapic; -#define MP_APIC_ALL 0xFF - unsigned char mpc_destapiclint; -}; - -struct mp_config_oemtable -{ - char oem_signature[4]; -#define MPC_OEM_SIGNATURE "_OEM" - unsigned short oem_length; /* Size of table */ - char oem_rev; /* 0x01 */ - char oem_checksum; - char mpc_oem[8]; -}; - -struct mpc_config_translation -{ - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; -}; - -/* - * Default configurations - * - * 1 2 CPU ISA 82489DX - * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining - * 3 2 CPU EISA 82489DX - * 4 2 CPU MCA 82489DX - * 5 2 CPU ISA+PCI - * 6 2 CPU EISA+PCI - * 7 2 CPU MCA+PCI - */ - -#ifdef CONFIG_MULTIQUAD -#define MAX_IRQ_SOURCES 512 -#else /* !CONFIG_MULTIQUAD */ -#define MAX_IRQ_SOURCES 256 -#endif /* CONFIG_MULTIQUAD */ - -#define MAX_MP_BUSSES 32 -enum mp_bustype { - MP_BUS_ISA = 1, - MP_BUS_EISA, - MP_BUS_PCI, - MP_BUS_MCA -}; -extern int *mp_bus_id_to_type; -extern int *mp_bus_id_to_node; -extern int *mp_bus_id_to_local; -extern int *mp_bus_id_to_pci_bus; -extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; - -extern unsigned int boot_cpu_physical_apicid; -/*extern unsigned long phys_cpu_present_map;*/ -extern int smp_found_config; -extern void find_smp_config (void); -extern void get_smp_config (void); -/*extern int nr_ioapics;*/ -extern int apic_version [MAX_APICS]; -/*extern int mp_irq_entries;*/ -/*extern struct mpc_config_intsrc *mp_irqs;*/ -/*extern int mpc_default_type;*/ -extern int mp_current_pci_id; -extern unsigned long mp_lapic_addr; -/*extern int pic_mode;*/ -extern int using_apic_timer; - -#ifdef CONFIG_ACPI_BOOT -extern void mp_register_lapic (u8 id, u8 enabled); -extern void mp_register_lapic_address (u64 address); - -#ifdef CONFIG_X86_IO_APIC -extern void mp_register_ioapic (u8 id, u32 address, u32 irq_base); -extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 global_irq); -extern void mp_config_acpi_legacy_irqs (void); -extern void mp_config_ioapic_for_sci(int irq); -extern void mp_parse_prt (void); -#else /*!CONFIG_X86_IO_APIC*/ -static inline void mp_config_ioapic_for_sci(int irq) { } -#endif /*!CONFIG_X86_IO_APIC*/ - -#endif /*CONFIG_ACPI_BOOT*/ - -#endif - diff --git a/xen/include/asm-i386/msr.h b/xen/include/asm-i386/msr.h deleted file mode 100644 index 45ec765e6e..0000000000 --- a/xen/include/asm-i386/msr.h +++ /dev/null @@ -1,121 +0,0 @@ -#ifndef __ASM_MSR_H -#define __ASM_MSR_H - -/* - * Access to machine-specific registers (available on 586 and better only) - * Note: the rd* operations modify the parameters directly (without using - * pointer indirection), this allows gcc to optimize better - */ - -#define rdmsr(msr,val1,val2) \ - __asm__ __volatile__("rdmsr" \ - : "=a" (val1), "=d" (val2) \ - : "c" (msr)) - -#define wrmsr(msr,val1,val2) \ - __asm__ __volatile__("wrmsr" \ - : /* no outputs */ \ - : "c" (msr), "a" (val1), "d" (val2)) - -#define rdtsc(low,high) \ - __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) - -#define rdtscl(low) \ - __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") - -#define rdtscll(val) \ - __asm__ __volatile__("rdtsc" : "=A" (val)) - -#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) - -#define rdpmc(counter,low,high) \ - __asm__ __volatile__("rdpmc" \ - : "=a" (low), "=d" (high) \ - : "c" (counter)) - -/* symbolic names for some interesting MSRs */ -/* Intel defined MSRs. */ -#define MSR_IA32_P5_MC_ADDR 0 -#define MSR_IA32_P5_MC_TYPE 1 -#define MSR_IA32_PLATFORM_ID 0x17 -#define MSR_IA32_EBL_CR_POWERON 0x2a - -#define MSR_IA32_APICBASE 0x1b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) - -#define MSR_IA32_UCODE_WRITE 0x79 -#define MSR_IA32_UCODE_REV 0x8b - -#define MSR_IA32_BBL_CR_CTL 0x119 - -#define MSR_IA32_MCG_CAP 0x179 -#define MSR_IA32_MCG_STATUS 0x17a -#define MSR_IA32_MCG_CTL 0x17b - -#define MSR_IA32_THERM_CONTROL 0x19a -#define MSR_IA32_THERM_INTERRUPT 0x19b -#define MSR_IA32_THERM_STATUS 0x19c -#define MSR_IA32_MISC_ENABLE 0x1a0 - -#define MSR_IA32_DEBUGCTLMSR 0x1d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x1db -#define MSR_IA32_LASTBRANCHTOIP 0x1dc -#define MSR_IA32_LASTINTFROMIP 0x1dd -#define MSR_IA32_LASTINTTOIP 0x1de - -#define MSR_IA32_MC0_CTL 0x400 -#define MSR_IA32_MC0_STATUS 0x401 -#define MSR_IA32_MC0_ADDR 0x402 -#define MSR_IA32_MC0_MISC 0x403 - -#define MSR_P6_PERFCTR0 0xc1 -#define MSR_P6_PERFCTR1 0xc2 -#define MSR_P6_EVNTSEL0 0x186 -#define MSR_P6_EVNTSEL1 0x187 - -/* AMD Defined MSRs */ -#define MSR_K6_EFER 0xC0000080 -#define MSR_K6_STAR 0xC0000081 -#define MSR_K6_WHCR 0xC0000082 -#define MSR_K6_UWCCR 0xC0000085 -#define MSR_K6_EPMR 0xC0000086 -#define MSR_K6_PSOR 0xC0000087 -#define MSR_K6_PFIR 0xC0000088 - -#define MSR_K7_EVNTSEL0 0xC0010000 -#define MSR_K7_PERFCTR0 0xC0010004 -#define MSR_K7_HWCR 0xC0010015 -#define MSR_K7_CLK_CTL 0xC001001b -#define MSR_K7_FID_VID_CTL 0xC0010041 -#define MSR_K7_VID_STATUS 0xC0010042 - -/* Centaur-Hauls/IDT defined MSRs. */ -#define MSR_IDT_FCR1 0x107 -#define MSR_IDT_FCR2 0x108 -#define MSR_IDT_FCR3 0x109 -#define MSR_IDT_FCR4 0x10a - -#define MSR_IDT_MCR0 0x110 -#define MSR_IDT_MCR1 0x111 -#define MSR_IDT_MCR2 0x112 -#define MSR_IDT_MCR3 0x113 -#define MSR_IDT_MCR4 0x114 -#define MSR_IDT_MCR5 0x115 -#define MSR_IDT_MCR6 0x116 -#define MSR_IDT_MCR7 0x117 -#define MSR_IDT_MCR_CTRL 0x120 - -/* VIA Cyrix defined MSRs*/ -#define MSR_VIA_FCR 0x1107 -#define MSR_VIA_LONGHAUL 0x110a -#define MSR_VIA_BCR2 0x1147 - -/* Transmeta defined MSRs */ -#define MSR_TMTA_LONGRUN_CTRL 0x80868010 -#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 -#define MSR_TMTA_LRTI_READOUT 0x80868018 -#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a - -#endif /* __ASM_MSR_H */ diff --git a/xen/include/asm-i386/page.h b/xen/include/asm-i386/page.h deleted file mode 100644 index 61996d4ccc..0000000000 --- a/xen/include/asm-i386/page.h +++ /dev/null @@ -1,188 +0,0 @@ -#ifndef _I386_PAGE_H -#define _I386_PAGE_H - -#define BUG() do { \ - printk("BUG at %s:%d\n", __FILE__, __LINE__); \ - __asm__ __volatile__("ud2"); \ -} while (0) - -#define L1_PAGETABLE_SHIFT 12 -#define L2_PAGETABLE_SHIFT 22 - -#define ENTRIES_PER_L1_PAGETABLE 1024 -#define ENTRIES_PER_L2_PAGETABLE 1024 - -#define PAGE_SHIFT L1_PAGETABLE_SHIFT -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - -#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE) -#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE) - -#ifndef __ASSEMBLY__ -#include -typedef struct { unsigned long l1_lo; } l1_pgentry_t; -typedef struct { unsigned long l2_lo; } l2_pgentry_t; -typedef l1_pgentry_t *l1_pagetable_t; -typedef l2_pgentry_t *l2_pagetable_t; -typedef struct { unsigned long pt_lo; } pagetable_t; -#endif /* !__ASSEMBLY__ */ - -/* Strip type from a table entry. */ -#define l1_pgentry_val(_x) ((_x).l1_lo) -#define l2_pgentry_val(_x) ((_x).l2_lo) -#define pagetable_val(_x) ((_x).pt_lo) - -#define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL)) -#define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL)) - -/* Add type to a table entry. */ -#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } ) -#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } ) -#define mk_pagetable(_x) ( (pagetable_t) { (_x) } ) - -/* Turn a typed table entry into a page index. */ -#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT) -#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT) - -/* Turn a typed table entry into a physical address. */ -#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK) -#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK) - -/* Dereference a typed level-2 entry to yield a typed level-1 table. */ -#define l2_pgentry_to_l1(_x) \ - ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK)) - -/* Given a virtual address, get an entry offset into a page table. */ -#define l1_table_offset(_a) \ - (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1)) -#define l2_table_offset(_a) \ - ((_a) >> L2_PAGETABLE_SHIFT) - -/* Hypervisor table entries use zero to sugnify 'empty'. */ -#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x)) -#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x)) - -#define __PAGE_OFFSET (0xFC400000) -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) -#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT)) -#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT)) -#define VALID_PAGE(page) ((page - frame_table) < max_mapnr) - -/* - * NB. We don't currently track I/O holes in the physical RAM space. - * For now we guess that I/O devices will be mapped in the first 1MB - * (e.g., VGA buffers) or beyond the end of physical RAM. - */ -#define pfn_is_ram(_pfn) (((_pfn) > 0x100) && ((_pfn) < max_page)) - -/* High table entries are reserved by the hypervisor. */ -#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \ - (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) -#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \ - (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE) - -#ifndef __ASSEMBLY__ -#include -#include -#include -#include - -#define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START) -#define linear_l2_table ((l2_pgentry_t *)(LINEAR_PT_VIRT_START+(LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT)))) - -#define va_to_l1mfn(_va) (l2_pgentry_val(linear_l2_table[_va>>L2_PAGETABLE_SHIFT]) >> PAGE_SHIFT) - -extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE]; -extern void paging_init(void); - -#define __flush_tlb() \ - do { \ - __asm__ __volatile__ ( \ - "movl %%cr3, %%eax; movl %%eax, %%cr3" \ - : : : "memory", "eax" ); \ - tlb_clocktick(); \ - } while ( 0 ) - -/* Flush global pages as well. */ - -#define __pge_off() \ - do { \ - __asm__ __volatile__( \ - "movl %0, %%cr4; # turn off PGE " \ - :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \ - } while (0) - -#define __pge_on() \ - do { \ - __asm__ __volatile__( \ - "movl %0, %%cr4; # turn off PGE " \ - :: "r" (mmu_cr4_features)); \ - } while (0) - - -#define __flush_tlb_pge() \ - do { \ - __pge_off(); \ - __flush_tlb(); \ - __pge_on(); \ - } while (0) - -#define __flush_tlb_one(__addr) \ -__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr))) - -#endif /* !__ASSEMBLY__ */ - - -#define _PAGE_PRESENT 0x001 -#define _PAGE_RW 0x002 -#define _PAGE_USER 0x004 -#define _PAGE_PWT 0x008 -#define _PAGE_PCD 0x010 -#define _PAGE_ACCESSED 0x020 -#define _PAGE_DIRTY 0x040 -#define _PAGE_PAT 0x080 -#define _PAGE_PSE 0x080 -#define _PAGE_GLOBAL 0x100 - -#define __PAGE_HYPERVISOR \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -#define __PAGE_HYPERVISOR_NOCACHE \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) -#define __PAGE_HYPERVISOR_RO \ - (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) - -#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL) - -#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR) -#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO) -#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE) - -#define mk_l2_writeable(_p) \ - (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) | _PAGE_RW)) -#define mk_l2_readonly(_p) \ - (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) & ~_PAGE_RW)) -#define mk_l1_writeable(_p) \ - (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) | _PAGE_RW)) -#define mk_l1_readonly(_p) \ - (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW)) - - -#ifndef __ASSEMBLY__ -static __inline__ int get_order(unsigned long size) -{ - int order; - - size = (size-1) >> (PAGE_SHIFT-1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} -#endif - -#endif /* _I386_PAGE_H */ diff --git a/xen/include/asm-i386/param.h b/xen/include/asm-i386/param.h deleted file mode 100644 index 1b10bf49fe..0000000000 --- a/xen/include/asm-i386/param.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _ASMi386_PARAM_H -#define _ASMi386_PARAM_H - -#ifndef HZ -#define HZ 100 -#endif - -#define EXEC_PAGESIZE 4096 - -#ifndef NGROUPS -#define NGROUPS 32 -#endif - -#ifndef NOGROUP -#define NOGROUP (-1) -#endif - -#define MAXHOSTNAMELEN 64 /* max length of hostname */ - -#ifdef __KERNEL__ -# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */ -#endif - -#endif diff --git a/xen/include/asm-i386/pci.h b/xen/include/asm-i386/pci.h deleted file mode 100644 index 34c69c7aa4..0000000000 --- a/xen/include/asm-i386/pci.h +++ /dev/null @@ -1,292 +0,0 @@ -#ifndef __i386_PCI_H -#define __i386_PCI_H - -#include - -#ifdef __KERNEL__ - -/* Can be used to override the logic in pci_scan_bus for skipping - already-configured bus numbers - to be used for buggy BIOSes - or architectures with incomplete PCI setup by the loader */ - -#ifdef CONFIG_PCI -extern unsigned int pcibios_assign_all_busses(void); -#else -#define pcibios_assign_all_busses() 0 -#endif -#define pcibios_scan_all_fns() 0 - -extern unsigned long pci_mem_start; -#define PCIBIOS_MIN_IO 0x1000 -#define PCIBIOS_MIN_MEM (pci_mem_start) - -void pcibios_config_init(void); -struct pci_bus * pcibios_scan_root(int bus); -extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value); -extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value); - -void pcibios_set_master(struct pci_dev *dev); -void pcibios_penalize_isa_irq(int irq); -struct irq_routing_table *pcibios_get_irq_routing_table(void); -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); - -/* Dynamic DMA mapping stuff. - * i386 has everything mapped statically. - */ - -#include -#include -#include -/*#include */ -#include - -struct pci_dev; - -/* The PCI address space does equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - -/* Allocate and map kernel buffer using consistent mode DMA for a device. - * hwdev should be valid struct pci_dev pointer for PCI devices, - * NULL for PCI-like buses (ISA, EISA). - * Returns non-NULL cpu-view pointer to the buffer if successful and - * sets *dma_addrp to the pci side dma address as well, else *dma_addrp - * is undefined. - */ -extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle); - -/* Free and unmap a consistent DMA buffer. - * cpu_addr is what was returned from pci_alloc_consistent, - * size must be the same as what as passed into pci_alloc_consistent, - * and likewise dma_addr must be the same as what *dma_addrp was set to. - * - * References to the memory and mappings associated with cpu_addr/dma_addr - * past this call are illegal. - */ -extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle); - -/* Map a single buffer of the indicated size for DMA in streaming mode. - * The 32-bit bus address to use is returned. - * - * Once the device is given the dma address, the device owns this memory - * until either pci_unmap_single or pci_dma_sync_single is performed. - */ -static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); - return virt_to_bus(ptr); -} - -/* Unmap a single streaming mode DMA translation. The dma_addr and size - * must match what was provided for in a previous pci_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guarenteed to see - * whatever the device wrote there. - */ -static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ -} - -/* - * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical - * to pci_map_single, but takes a struct pfn_info instead of a virtual address - */ -static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct pfn_info *page, - unsigned long offset, size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - - return (dma_addr_t)(page - frame_table) * PAGE_SIZE + offset; -} - -static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ -} - -/* pci_unmap_{page,single} is a nop so... */ -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) (0) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define pci_unmap_len(PTR, LEN_NAME) (0) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scather-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - int i; - - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - - /* - * temporary 2.4 hack - */ - for (i = 0; i < nents; i++ ) { - if (sg[i].address && sg[i].page) - out_of_line_bug(); -#if 0 - /* Invalid check, since address==0 is valid. */ - else if (!sg[i].address && !sg[i].page) - out_of_line_bug(); -#endif - - /* XXX Switched round, since address==0 is valid. */ - if (sg[i].page) - sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; - else - sg[i].dma_address = virt_to_bus(sg[i].address); - } - - flush_write_buffers(); - return nents; -} - -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ -} - -/* Make physical memory consistent for a single - * streaming mode DMA translation after a transfer. - * - * If you perform a pci_map_single() but wish to interrogate the - * buffer using the cpu, yet do not wish to teardown the PCI dma - * mapping, you must call this function before doing so. At the - * next point you give the PCI dma address back to the card, the - * device again owns the buffer. - */ -static inline void pci_dma_sync_single(struct pci_dev *hwdev, - dma_addr_t dma_handle, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); -} - -/* Make physical memory consistent for a set of streaming - * mode DMA translations after a transfer. - * - * The same as pci_dma_sync_single but for a scatter-gather list, - * same rules and usage. - */ -static inline void pci_dma_sync_sg(struct pci_dev *hwdev, - struct scatterlist *sg, - int nelems, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); -} - -/* Return whether the given PCI device DMA address mask can - * be supported properly. For example, if your device can - * only drive the low 24-bits during PCI bus mastering, then - * you would pass 0x00ffffff as the mask to this function. - */ -static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if(mask < 0x00ffffff) - return 0; - - return 1; -} - -/* This is always fine. */ -#define pci_dac_dma_supported(pci_dev, mask) (1) - -static __inline__ dma64_addr_t -pci_dac_page_to_dma(struct pci_dev *pdev, struct pfn_info *page, unsigned long offset, int direction) -{ - return ((dma64_addr_t) page_to_bus(page) + - (dma64_addr_t) offset); -} - -static __inline__ struct pfn_info * -pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - unsigned long poff = (dma_addr >> PAGE_SHIFT); - - return frame_table + poff; -} - -static __inline__ unsigned long -pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - return (dma_addr & ~PAGE_MASK); -} - -static __inline__ void -pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) -{ - flush_write_buffers(); -} - -/* These macros should be used after a pci_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns. - */ -#define sg_dma_address(sg) ((sg)->dma_address) -#define sg_dma_len(sg) ((sg)->length) - -/* Return the index of the PCI controller for device. */ -static inline int pci_controller_num(struct pci_dev *dev) -{ - return 0; -} - -#if 0 /* XXX Not in land of Xen XXX */ -#define HAVE_PCI_MMAP -extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine); -#endif - -#endif /* __KERNEL__ */ - -#endif /* __i386_PCI_H */ diff --git a/xen/include/asm-i386/pdb.h b/xen/include/asm-i386/pdb.h deleted file mode 100644 index 2ed6a9a318..0000000000 --- a/xen/include/asm-i386/pdb.h +++ /dev/null @@ -1,88 +0,0 @@ - -/* - * pervasive debugger - * www.cl.cam.ac.uk/netos/pdb - * - * alex ho - * 2004 - * university of cambridge computer laboratory - */ - - -#ifndef __PDB_H__ -#define __PDB_H__ - -#include -#include -#include -#include /* for domain id */ - -extern int pdb_initialized; -extern int pdb_com_port; -extern int pdb_high_bit; -extern int pdb_page_fault_possible; -extern int pdb_page_fault_scratch; -extern int pdb_page_fault; - -extern void initialize_pdb(void); - -/* Get/set values from generic debug interface. */ -extern int pdb_set_values(u_char *buffer, int length, - unsigned long cr3, unsigned long addr); -extern int pdb_get_values(u_char *buffer, int length, - unsigned long cr3, unsigned long addr); - -/* External entry points. */ -extern int pdb_handle_exception(int exceptionVector, - struct pt_regs *xen_regs); -extern int pdb_serial_input(u_char c, struct pt_regs *regs); -extern void pdb_do_debug(dom0_op_t *op); - -/* PDB Context. */ -struct pdb_context -{ - int valid; - int domain; - int process; - int system_call; /* 0x01 break on enter, 0x02 break on exit */ - unsigned long ptbr; -}; -extern struct pdb_context pdb_ctx; - -/* Breakpoints. */ -struct pdb_breakpoint -{ - struct list_head list; - unsigned long address; - unsigned long cr3; - domid_t domain; -}; -extern void pdb_bkpt_add (unsigned long cr3, unsigned long address); -extern struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3, - unsigned long address); -extern int pdb_bkpt_remove (unsigned long cr3, unsigned long address); - -/* Conversions. */ -extern int hex (char); -extern char *mem2hex (char *, char *, int); -extern char *hex2mem (char *, char *, int); -extern int hexToInt (char **ptr, int *intValue); - -/* Temporary Linux specific definitions */ -extern int pdb_system_call; -extern unsigned char pdb_system_call_enter_instr; /* original enter instr */ -extern unsigned char pdb_system_call_leave_instr; /* original next instr */ -extern unsigned long pdb_system_call_next_addr; /* instr after int 0x80 */ -extern unsigned long pdb_system_call_eflags_addr; /* saved eflags on stack */ - -unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid); -void pdb_linux_get_values(char *buffer, int length, unsigned long address, - int pid, unsigned long cr3); -void pdb_linux_set_values(char *buffer, int length, unsigned long address, - int pid, unsigned long cr3); -void pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code, - trap_info_t *ti); -void pdb_linux_syscall_exit_bkpt (struct pt_regs *regs, - struct pdb_context *pdb_ctx); - -#endif /* __PDB_H__ */ diff --git a/xen/include/asm-i386/pgalloc.h b/xen/include/asm-i386/pgalloc.h deleted file mode 100644 index db1d6b948e..0000000000 --- a/xen/include/asm-i386/pgalloc.h +++ /dev/null @@ -1,79 +0,0 @@ -#ifndef _I386_PGALLOC_H -#define _I386_PGALLOC_H - -#include -#include -#include -#include - -#define pgd_quicklist (current_cpu_data.pgd_quick) -#define pmd_quicklist (current_cpu_data.pmd_quick) -#define pte_quicklist (current_cpu_data.pte_quick) -#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) - - -/* - * Allocate and free page tables. - */ - - -#define pte_free(pte) pte_free_fast(pte) -#define pgd_alloc(mm) get_pgd_fast() -#define pgd_free(pgd) free_pgd_fast(pgd) - -/* - * allocating and freeing a pmd is trivial: the 1-entry pmd is - * inside the pgd, so has no extra memory associated with it. - * (In the PAE case we free the pmds as part of the pgd.) - */ - -#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free_slow(x) do { } while (0) -#define pmd_free_fast(x) do { } while (0) -#define pmd_free(x) do { } while (0) -#define pgd_populate(mm, pmd, pte) BUG() - -/* - * TLB flushing: - * - * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables - * - * ..but the i386 has somewhat limited tlb flushing capabilities, - * and page-granular flushes are available only on i486 and up. - */ - -#ifndef CONFIG_SMP - -#define flush_tlb() __flush_tlb() -#define flush_tlb_all() __flush_tlb() -#define flush_tlb_all_pge() __flush_tlb_pge() -#define local_flush_tlb() __flush_tlb() -#define flush_tlb_cpu(_cpu) __flush_tlb() -#define flush_tlb_mask(_mask) __flush_tlb() -#define try_flush_tlb_mask(_mask) __flush_tlb() - -#else - -#include - -extern int try_flush_tlb_mask(unsigned long mask); -extern void flush_tlb_mask(unsigned long mask); -extern void flush_tlb_all_pge(void); - -#define flush_tlb() __flush_tlb() -#define flush_tlb_all() flush_tlb_mask((1 << smp_num_cpus) - 1) -#define local_flush_tlb() __flush_tlb() -#define flush_tlb_cpu(_cpu) flush_tlb_mask(1 << (_cpu)) - -#endif - -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* i386 does not keep any page table caches in TLB */ -} - -#endif /* _I386_PGALLOC_H */ diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h deleted file mode 100644 index 823c6ca851..0000000000 --- a/xen/include/asm-i386/processor.h +++ /dev/null @@ -1,563 +0,0 @@ -/* - * include/asm-i386/processor.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -#ifndef __ASM_I386_PROCESSOR_H -#define __ASM_I386_PROCESSOR_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct task_struct; - -/* - * Default implementation of macro that returns current - * instruction pointer ("program counter"). - */ -#define current_text_addr() \ - ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) - -/* - * CPU type and hardware bug flags. Kept separately for each CPU. - * Members of this structure are referenced in head.S, so think twice - * before touching them. [mj] - */ - -struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; - __u8 x86_mask; - char wp_works_ok; /* It doesn't on 386's */ - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ - char hard_math; - char rfu; - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ - __u32 x86_capability[NCAPINTS]; - char x86_vendor_id[16]; - char x86_model_id[64]; - int x86_cache_size; /* in KB - valid for CPUS which support this - call */ - int fdiv_bug; - int f00f_bug; - int coma_bug; - unsigned long loops_per_jiffy; - unsigned long *pgd_quick; - unsigned long *pmd_quick; - unsigned long *pte_quick; - unsigned long pgtable_cache_sz; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); - -#define X86_VENDOR_INTEL 0 -#define X86_VENDOR_CYRIX 1 -#define X86_VENDOR_AMD 2 -#define X86_VENDOR_UMC 3 -#define X86_VENDOR_NEXGEN 4 -#define X86_VENDOR_CENTAUR 5 -#define X86_VENDOR_RISE 6 -#define X86_VENDOR_TRANSMETA 7 -#define X86_VENDOR_NSC 8 -#define X86_VENDOR_SIS 9 -#define X86_VENDOR_UNKNOWN 0xff - -/* - * capabilities of CPUs - */ - -extern struct cpuinfo_x86 boot_cpu_data; -extern struct tss_struct init_tss[NR_CPUS]; - -#ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] -#else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data -#endif - -extern char ignore_irq13; - -extern void identify_cpu(struct cpuinfo_x86 *); -extern void print_cpu_info(struct cpuinfo_x86 *); -extern void dodgy_tsc(void); - -/* - * EFLAGS bits - */ -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ - -/* - * Generic CPUID function - */ -static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) -{ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op)); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax; - - __asm__("cpuid" - : "=a" (eax) - : "0" (op) - : "bx", "cx", "dx"); - return eax; -} -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx; - - __asm__("cpuid" - : "=a" (eax), "=b" (ebx) - : "0" (op) - : "cx", "dx" ); - return ebx; -} -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ecx; - - __asm__("cpuid" - : "=a" (eax), "=c" (ecx) - : "0" (op) - : "bx", "dx" ); - return ecx; -} -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, edx; - - __asm__("cpuid" - : "=a" (eax), "=d" (edx) - : "0" (op) - : "bx", "cx"); - return edx; -} - - -/* - * Intel CPU flags in CR0 - */ -#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */ -#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor (RW) */ -#define X86_CR0_EM 0x00000004 /* Require FPU Emulation (RO) */ -#define X86_CR0_TS 0x00000008 /* Task Switched (RW) */ -#define X86_CR0_NE 0x00000020 /* Numeric Error Reporting (RW) */ -#define X86_CR0_WP 0x00010000 /* Supervisor Write Protect (RW) */ -#define X86_CR0_AM 0x00040000 /* Alignment Checking (RW) */ -#define X86_CR0_NW 0x20000000 /* Not Write-Through (RW) */ -#define X86_CR0_CD 0x40000000 /* Cache Disable (RW) */ -#define X86_CR0_PG 0x80000000 /* Paging (RW) */ - -#define read_cr0() ({ \ - unsigned int __dummy; \ - __asm__( \ - "movl %%cr0,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) - -#define write_cr0(x) \ - __asm__("movl %0,%%cr0": :"r" (x)); - - -/* - * Intel CPU features in CR4 - */ -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ -#define X86_CR4_MCE 0x0040 /* Machine check enable */ -#define X86_CR4_PGE 0x0080 /* enable global pages */ -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ - -/* - * Save the cr4 feature set we're using (ie - * Pentium 4MB enable and PPro Global page - * enable), so that any CPU's that boot up - * after us can get the correct flags. - */ -extern unsigned long mmu_cr4_features; - -static inline void set_in_cr4 (unsigned long mask) -{ - mmu_cr4_features |= mask; - __asm__("movl %%cr4,%%eax\n\t" - "orl %0,%%eax\n\t" - "movl %%eax,%%cr4\n" - : : "irg" (mask) - :"ax"); -} - -static inline void clear_in_cr4 (unsigned long mask) -{ - mmu_cr4_features &= ~mask; - __asm__("movl %%cr4,%%eax\n\t" - "andl %0,%%eax\n\t" - "movl %%eax,%%cr4\n" - : : "irg" (~mask) - :"ax"); -} - - - -/* - * Cyrix CPU configuration register indexes - */ -#define CX86_CCR0 0xc0 -#define CX86_CCR1 0xc1 -#define CX86_CCR2 0xc2 -#define CX86_CCR3 0xc3 -#define CX86_CCR4 0xe8 -#define CX86_CCR5 0xe9 -#define CX86_CCR6 0xea -#define CX86_CCR7 0xeb -#define CX86_DIR0 0xfe -#define CX86_DIR1 0xff -#define CX86_ARR_BASE 0xc4 -#define CX86_RCR_BASE 0xdc - -/* - * Cyrix CPU indexed register access macros - */ - -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - -#define EISA_bus (0) -#define MCA_bus (0) - -/* from system description table in BIOS. Mostly for MCA use, but -others may find it useful. */ -extern unsigned int machine_id; -extern unsigned int machine_submodel_id; -extern unsigned int BIOS_revision; -extern unsigned int mca_pentium_flag; - -/* - * User space process size: 3GB (default). - */ -#define TASK_SIZE (PAGE_OFFSET) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) - -/* - * Size of io_bitmap in longwords: - * For Xen we support the full 8kbyte IO bitmap but use the io_bitmap_sel field - * of the task_struct to avoid a full 8kbyte copy when switching to / from - * domains with bits cleared. - */ -#define IO_BITMAP_SIZE 2048 -#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4) -#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) -#define INVALID_IO_BITMAP_OFFSET 0x8000 - -struct i387_fsave_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ - long status; /* software status information */ -}; - -struct i387_fxsave_struct { - unsigned short cwd; - unsigned short swd; - unsigned short twd; - unsigned short fop; - long fip; - long fcs; - long foo; - long fos; - long mxcsr; - long reserved; - long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ - long padding[56]; -} __attribute__ ((aligned (16))); - -struct i387_soft_struct { - long cwd; - long swd; - long twd; - long fip; - long fcs; - long foo; - long fos; - long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ - unsigned char ftop, changed, lookahead, no_update, rm, alimit; - struct info *info; - unsigned long entry_eip; -}; - -union i387_union { - struct i387_fsave_struct fsave; - struct i387_fxsave_struct fxsave; - struct i387_soft_struct soft; -}; - -typedef struct { - unsigned long seg; -} mm_segment_t; - -struct tss_struct { - unsigned short back_link,__blh; - unsigned long esp0; - unsigned short ss0,__ss0h; - unsigned long esp1; - unsigned short ss1,__ss1h; - unsigned long esp2; - unsigned short ss2,__ss2h; - unsigned long __cr3; - unsigned long eip; - unsigned long eflags; - unsigned long eax,ecx,edx,ebx; - unsigned long esp; - unsigned long ebp; - unsigned long esi; - unsigned long edi; - unsigned short es, __esh; - unsigned short cs, __csh; - unsigned short ss, __ssh; - unsigned short ds, __dsh; - unsigned short fs, __fsh; - unsigned short gs, __gsh; - unsigned short ldt, __ldth; - unsigned short trace, bitmap; - unsigned long io_bitmap[IO_BITMAP_SIZE+1]; - /* - * pads the TSS to be cacheline-aligned (total size is 0x2080) - */ - unsigned long __cacheline_filler[5]; -}; - -struct thread_struct { - unsigned long guestos_sp, guestos_ss; -/* Hardware debugging registers */ - unsigned long debugreg[8]; /* %%db0-7 debug registers */ -/* floating point info */ - union i387_union i387; -/* Trap info. */ - int fast_trap_idx; - struct desc_struct fast_trap_desc; - trap_info_t traps[256]; -}; - -#define IDT_ENTRIES 256 -extern struct desc_struct idt_table[]; -extern struct desc_struct *idt_tables[]; - -#define SET_DEFAULT_FAST_TRAP(_p) \ - (_p)->fast_trap_idx = 0x20; \ - (_p)->fast_trap_desc.a = 0; \ - (_p)->fast_trap_desc.b = 0; - -#define CLEAR_FAST_TRAP(_p) \ - (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ - 0, 8)) - -#ifdef XEN_DEBUGGER -#define SET_FAST_TRAP(_p) \ - (pdb_initialized ? (void *) 0 : \ - (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ - &((_p)->fast_trap_desc), 8))) -#else -#define SET_FAST_TRAP(_p) \ - (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ - &((_p)->fast_trap_desc), 8)) -#endif - -long set_fast_trap(struct task_struct *p, int idx); - -#define INIT_THREAD { \ - 0, 0, \ - { [0 ... 7] = 0 }, /* debugging registers */ \ - { { 0, }, }, /* 387 state */ \ - 0x20, { 0, 0 }, /* DEFAULT_FAST_TRAP */ \ - { {0} } /* io permissions */ \ -} - -#define INIT_TSS { \ - 0,0, /* back_link, __blh */ \ - 0, /* esp0 */ \ - 0, 0, /* ss0 */ \ - 0,0,0,0,0,0, /* stack1, stack2 */ \ - 0, /* cr3 */ \ - 0,0, /* eip,eflags */ \ - 0,0,0,0, /* eax,ecx,edx,ebx */ \ - 0,0,0,0, /* esp,ebp,esi,edi */ \ - 0,0,0,0,0,0, /* es,cs,ss */ \ - 0,0,0,0,0,0, /* ds,fs,gs */ \ - 0,0, /* ldt */ \ - 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ - { [0 ... IO_BITMAP_SIZE] = ~0UL }, /* ioperm */ \ -} - -struct mm_struct { - /* - * Every domain has a L1 pagetable of its own. Per-domain mappings - * are put in this table (eg. the current GDT is mapped here). - */ - l1_pgentry_t *perdomain_pt; - pagetable_t pagetable; - - /* shadow mode status and controls */ - unsigned int shadow_mode; /* flags to control shadow table operation */ - pagetable_t shadow_table; - spinlock_t shadow_lock; - unsigned int shadow_max_page_count; // currently unused - - /* shadow hashtable */ - struct shadow_status *shadow_ht; - struct shadow_status *shadow_ht_free; - struct shadow_status *shadow_ht_extras; /* extra allocation units */ - unsigned int shadow_extras_count; - - /* shadow dirty bitmap */ - unsigned long *shadow_dirty_bitmap; - unsigned int shadow_dirty_bitmap_size; /* in pages, bit per page */ - - /* shadow mode stats */ - unsigned int shadow_page_count; - unsigned int shadow_fault_count; - unsigned int shadow_dirty_count; - - - /* Current LDT details. */ - unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt; - /* Next entry is passed to LGDT on domain switch. */ - char gdt[6]; -}; - -static inline void write_ptbase(struct mm_struct *mm) -{ - unsigned long pa; - - if ( unlikely(mm->shadow_mode) ) - pa = pagetable_val(mm->shadow_table); - else - pa = pagetable_val(mm->pagetable); - - __asm__ __volatile__ ( "movl %0, %%cr3" : : "r" (pa) : "memory" ); -} - -#define IDLE0_MM \ -{ \ - perdomain_pt: 0, \ - pagetable: mk_pagetable(__pa(idle_pg_table)) \ -} - -/* Convenient accessor for mm.gdt. */ -#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e)) -#define SET_GDT_ADDRESS(_p, _a) ((*(u32 *)((_p)->mm.gdt + 2)) = (_a)) -#define GET_GDT_ENTRIES(_p) ((*(u16 *)((_p)->mm.gdt + 0))) -#define GET_GDT_ADDRESS(_p) ((*(u32 *)((_p)->mm.gdt + 2))) - -long set_gdt(struct task_struct *p, - unsigned long *frames, - unsigned int entries); - -long set_debugreg(struct task_struct *p, int reg, unsigned long value); - -struct microcode { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int reserved[5]; - unsigned int bits[500]; -}; - -/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ -#define MICROCODE_IOCFREE _IO('6',0) - -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) -{ - __asm__ __volatile__("rep;nop"); -} - -#define cpu_relax() rep_nop() - -/* Prefetch instructions for Pentium III and AMD Athlon */ -#ifdef CONFIG_MPENTIUMIII - -#define ARCH_HAS_PREFETCH -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); -} - -#elif CONFIG_X86_USE_3DNOW - -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); -} - -extern inline void prefetchw(const void *x) -{ - __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); -} -#define spin_lock_prefetch(x) prefetchw(x) - -#endif - -#endif /* __ASM_I386_PROCESSOR_H */ diff --git a/xen/include/asm-i386/ptrace.h b/xen/include/asm-i386/ptrace.h deleted file mode 100644 index 26269afcb0..0000000000 --- a/xen/include/asm-i386/ptrace.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef _I386_PTRACE_H -#define _I386_PTRACE_H - -struct pt_regs { - long ebx; - long ecx; - long edx; - long esi; - long edi; - long ebp; - long eax; - int xds; - int xes; - int xfs; - int xgs; - long orig_eax; - long eip; - int xcs; - long eflags; - long esp; - int xss; -}; - -enum EFLAGS { - EF_CF = 0x00000001, - EF_PF = 0x00000004, - EF_AF = 0x00000010, - EF_ZF = 0x00000040, - EF_SF = 0x00000080, - EF_TF = 0x00000100, - EF_IE = 0x00000200, - EF_DF = 0x00000400, - EF_OF = 0x00000800, - EF_IOPL = 0x00003000, - EF_IOPL_RING0 = 0x00000000, - EF_IOPL_RING1 = 0x00001000, - EF_IOPL_RING2 = 0x00002000, - EF_NT = 0x00004000, /* nested task */ - EF_RF = 0x00010000, /* resume */ - EF_VM = 0x00020000, /* virtual mode */ - EF_AC = 0x00040000, /* alignment */ - EF_VIF = 0x00080000, /* virtual interrupt */ - EF_VIP = 0x00100000, /* virtual interrupt pending */ - EF_ID = 0x00200000, /* id */ -}; - -#ifdef __KERNEL__ -#define user_mode(regs) ((3 & (regs)->xcs)) -#endif - -#endif diff --git a/xen/include/asm-i386/rwlock.h b/xen/include/asm-i386/rwlock.h deleted file mode 100644 index 9475419f95..0000000000 --- a/xen/include/asm-i386/rwlock.h +++ /dev/null @@ -1,83 +0,0 @@ -/* include/asm-i386/rwlock.h - * - * Helpers used by both rw spinlocks and rw semaphores. - * - * Based in part on code from semaphore.h and - * spinlock.h Copyright 1996 Linus Torvalds. - * - * Copyright 1999 Red Hat, Inc. - * - * Written by Benjamin LaHaise. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _ASM_I386_RWLOCK_H -#define _ASM_I386_RWLOCK_H - -#define RW_LOCK_BIAS 0x01000000 -#define RW_LOCK_BIAS_STR "0x01000000" - -#define __build_read_lock_ptr(rw, helper) \ - asm volatile(LOCK "subl $1,(%0)\n\t" \ - "js 2f\n" \ - "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tcall " helper "\n\t" \ - "jmp 1b\n" \ - ".previous" \ - ::"a" (rw) : "memory") - -#define __build_read_lock_const(rw, helper) \ - asm volatile(LOCK "subl $1,%0\n\t" \ - "js 2f\n" \ - "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tpushl %%eax\n\t" \ - "leal %0,%%eax\n\t" \ - "call " helper "\n\t" \ - "popl %%eax\n\t" \ - "jmp 1b\n" \ - ".previous" \ - :"=m" (*(volatile int *)rw) : : "memory") - -#define __build_read_lock(rw, helper) do { \ - if (__builtin_constant_p(rw)) \ - __build_read_lock_const(rw, helper); \ - else \ - __build_read_lock_ptr(rw, helper); \ - } while (0) - -#define __build_write_lock_ptr(rw, helper) \ - asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ - "jnz 2f\n" \ - "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tcall " helper "\n\t" \ - "jmp 1b\n" \ - ".previous" \ - ::"a" (rw) : "memory") - -#define __build_write_lock_const(rw, helper) \ - asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ - "jnz 2f\n" \ - "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tpushl %%eax\n\t" \ - "leal %0,%%eax\n\t" \ - "call " helper "\n\t" \ - "popl %%eax\n\t" \ - "jmp 1b\n" \ - ".previous" \ - :"=m" (*(volatile int *)rw) : : "memory") - -#define __build_write_lock(rw, helper) do { \ - if (__builtin_constant_p(rw)) \ - __build_write_lock_const(rw, helper); \ - else \ - __build_write_lock_ptr(rw, helper); \ - } while (0) - -#endif diff --git a/xen/include/asm-i386/scatterlist.h b/xen/include/asm-i386/scatterlist.h deleted file mode 100644 index 9d858415db..0000000000 --- a/xen/include/asm-i386/scatterlist.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _I386_SCATTERLIST_H -#define _I386_SCATTERLIST_H - -struct scatterlist { - char * address; /* Location data is to be transferred to, NULL for - * highmem page */ - struct pfn_info * page; /* Location for highmem page, if any */ - unsigned int offset;/* for highmem, page offset */ - - dma_addr_t dma_address; - unsigned int length; -}; - -#define ISA_DMA_THRESHOLD (0x00ffffff) - -#endif /* !(_I386_SCATTERLIST_H) */ diff --git a/xen/include/asm-i386/smp.h b/xen/include/asm-i386/smp.h deleted file mode 100644 index b48bbae43c..0000000000 --- a/xen/include/asm-i386/smp.h +++ /dev/null @@ -1,115 +0,0 @@ -#ifndef __ASM_SMP_H -#define __ASM_SMP_H - -/* - * We need the APIC definitions automatically as part of 'smp.h' - */ -#ifndef __ASSEMBLY__ -#include -/*#include */ -#include -#endif - -#ifdef CONFIG_X86_LOCAL_APIC -#ifndef __ASSEMBLY__ -#include -#include -#include -#ifdef CONFIG_X86_IO_APIC -#include -#endif -#include -#endif -#endif - -#ifdef CONFIG_SMP -#ifndef __ASSEMBLY__ - -/* - * Private routines/data - */ - -extern void smp_alloc_memory(void); -extern unsigned long phys_cpu_present_map; -extern unsigned long cpu_online_map; -extern volatile unsigned long smp_invalidate_needed; -extern int pic_mode; -extern int smp_num_siblings; -extern int cpu_sibling_map[]; - -extern void smp_flush_tlb(void); -extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); -extern void smp_send_reschedule(int cpu); -extern void smp_invalidate_rcv(void); /* Process an NMI */ -extern void (*mtrr_hook) (void); -extern void zap_low_mappings (void); - -/* - * On x86 all CPUs are mapped 1:1 to the APIC space. - * This simplifies scheduling and IPI sending and - * compresses data structures. - */ -static inline int cpu_logical_map(int cpu) -{ - return cpu; -} -static inline int cpu_number_map(int cpu) -{ - return cpu; -} - -/* - * Some lowlevel functions might want to know about - * the real APIC ID <-> CPU # mapping. - */ -#define MAX_APICID 256 -extern volatile int cpu_to_physical_apicid[NR_CPUS]; -extern volatile int physical_apicid_to_cpu[MAX_APICID]; -extern volatile int cpu_to_logical_apicid[NR_CPUS]; -extern volatile int logical_apicid_to_cpu[MAX_APICID]; - -/* - * General functions that each host system must provide. - */ - -/*extern void smp_boot_cpus(void);*/ -extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ - -/* - * This function is needed by all SMP systems. It must _always_ be valid - * from the initial startup. We map APIC_BASE very early in page_setup(), - * so this is correct in the x86 case. - */ - -#define smp_processor_id() (current->processor) - -static __inline int hard_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); -} - -static __inline int logical_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); -} - -#endif /* !__ASSEMBLY__ */ - -#define NO_PROC_ID 0xFF /* No processor magic marker */ - -/* - * This magic constant controls our willingness to transfer - * a process across CPUs. Such a transfer incurs misses on the L1 - * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My - * gut feeling is this will vary by board in value. For a board - * with separate L2 cache it probably depends also on the RSS, and - * for a board with shared L2 cache it ought to decay fast as other - * processes are run. - */ - -#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ - -#endif -#endif diff --git a/xen/include/asm-i386/smpboot.h b/xen/include/asm-i386/smpboot.h deleted file mode 100644 index 7a0b157114..0000000000 --- a/xen/include/asm-i386/smpboot.h +++ /dev/null @@ -1,132 +0,0 @@ -#ifndef __ASM_SMPBOOT_H -#define __ASM_SMPBOOT_H - -/*emum for clustered_apic_mode values*/ -enum{ - CLUSTERED_APIC_NONE = 0, - CLUSTERED_APIC_XAPIC, - CLUSTERED_APIC_NUMAQ -}; - -#ifdef CONFIG_X86_CLUSTERED_APIC -extern unsigned int apic_broadcast_id; -extern unsigned char clustered_apic_mode; -extern unsigned char esr_disable; -extern unsigned char int_delivery_mode; -extern unsigned int int_dest_addr_mode; -extern int cyclone_setup(char*); - -static inline void detect_clustered_apic(char* oem, char* prod) -{ - /* - * Can't recognize Summit xAPICs at present, so use the OEM ID. - */ - if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){ - clustered_apic_mode = CLUSTERED_APIC_XAPIC; - apic_broadcast_id = APIC_BROADCAST_ID_XAPIC; - int_dest_addr_mode = APIC_DEST_PHYSICAL; - int_delivery_mode = dest_Fixed; - esr_disable = 1; - /*Start cyclone clock*/ - cyclone_setup(0); - /* check for ACPI tables */ - } else if (!strncmp(oem, "IBM", 3) && - (!strncmp(prod, "SERVIGIL", 8) || - !strncmp(prod, "EXA", 3) || - !strncmp(prod, "RUTHLESS", 8))){ - clustered_apic_mode = CLUSTERED_APIC_XAPIC; - apic_broadcast_id = APIC_BROADCAST_ID_XAPIC; - int_dest_addr_mode = APIC_DEST_PHYSICAL; - int_delivery_mode = dest_Fixed; - esr_disable = 1; - /*Start cyclone clock*/ - cyclone_setup(0); - } else if (!strncmp(oem, "IBM NUMA", 8)){ - clustered_apic_mode = CLUSTERED_APIC_NUMAQ; - apic_broadcast_id = APIC_BROADCAST_ID_APIC; - int_dest_addr_mode = APIC_DEST_LOGICAL; - int_delivery_mode = dest_LowestPrio; - esr_disable = 1; - } -} -#define INT_DEST_ADDR_MODE (int_dest_addr_mode) -#define INT_DELIVERY_MODE (int_delivery_mode) -#else /* CONFIG_X86_CLUSTERED_APIC */ -#define apic_broadcast_id (APIC_BROADCAST_ID_APIC) -#define clustered_apic_mode (CLUSTERED_APIC_NONE) -#define esr_disable (0) -#define detect_clustered_apic(x,y) -#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL) /* logical delivery */ -#define INT_DELIVERY_MODE (dest_LowestPrio) -#endif /* CONFIG_X86_CLUSTERED_APIC */ -#define BAD_APICID 0xFFu - -#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467) -#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469) - -#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid) - -extern unsigned char raw_phys_apicid[NR_CPUS]; - -/* - * How to map from the cpu_present_map - */ -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (clustered_apic_mode == CLUSTERED_APIC_XAPIC) - return raw_phys_apicid[mps_cpu]; - if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) - return (mps_cpu/4)*16 + (1<<(mps_cpu%4)); - return mps_cpu; -} - -static inline unsigned long apicid_to_phys_cpu_present(int apicid) -{ - if(clustered_apic_mode) - return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3)); - return 1UL << apicid; -} - -#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) ) - -/* - * Mappings between logical cpu number and logical / physical apicid - * The first four macros are trivial, but it keeps the abstraction consistent - */ -extern volatile int logical_apicid_2_cpu[]; -extern volatile int cpu_2_logical_apicid[]; -extern volatile int physical_apicid_2_cpu[]; -extern volatile int cpu_2_physical_apicid[]; - -#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid] -#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu] -#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid] -#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu] -#ifdef CONFIG_MULTIQUAD /* use logical IDs to bootstrap */ -#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid] -#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu] -#else /* !CONFIG_MULTIQUAD */ /* use physical IDs to bootstrap */ -#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid] -#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu] -#endif /* CONFIG_MULTIQUAD */ - -#ifdef CONFIG_X86_CLUSTERED_APIC -static inline int target_cpus(void) -{ - static int cpu; - switch(clustered_apic_mode){ - case CLUSTERED_APIC_NUMAQ: - /* Broadcast intrs to local quad only. */ - return APIC_BROADCAST_ID_APIC; - case CLUSTERED_APIC_XAPIC: - /*round robin the interrupts*/ - cpu = (cpu+1)%smp_num_cpus; - return cpu_to_physical_apicid(cpu); - default: - } - return cpu_online_map; -} -#else -#define target_cpus() (cpu_online_map) -#endif -#endif diff --git a/xen/include/asm-i386/softirq.h b/xen/include/asm-i386/softirq.h deleted file mode 100644 index 292baac6ea..0000000000 --- a/xen/include/asm-i386/softirq.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef __ASM_SOFTIRQ_H -#define __ASM_SOFTIRQ_H - -#include -#include - -#define cpu_bh_enable(cpu) \ - do { barrier(); local_bh_count(cpu)--; } while (0) -#define cpu_bh_disable(cpu) \ - do { local_bh_count(cpu)++; barrier(); } while (0) - -#define local_bh_disable() cpu_bh_disable(smp_processor_id()) -#define local_bh_enable() cpu_bh_enable(smp_processor_id()) - -#define in_softirq() (local_bh_count(smp_processor_id()) != 0) - -#endif /* __ASM_SOFTIRQ_H */ diff --git a/xen/include/asm-i386/spinlock.h b/xen/include/asm-i386/spinlock.h deleted file mode 100644 index d632b2139f..0000000000 --- a/xen/include/asm-i386/spinlock.h +++ /dev/null @@ -1,205 +0,0 @@ -#ifndef __ASM_SPINLOCK_H -#define __ASM_SPINLOCK_H - -#include -#include -#include -#include - -#if 0 -#define SPINLOCK_DEBUG 1 -#else -#define SPINLOCK_DEBUG 0 -#endif - -/* - * Your basic SMP spinlocks, allowing only a single CPU anywhere - */ - -typedef struct { - volatile unsigned int lock; -#if SPINLOCK_DEBUG - unsigned magic; -#endif -} spinlock_t; - -#define SPINLOCK_MAGIC 0xdead4ead - -#if SPINLOCK_DEBUG -#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC -#else -#define SPINLOCK_MAGIC_INIT /* */ -#endif - -#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT } - -#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) - -/* - * Simple spin lock operations. There are two variants, one clears IRQ's - * on the local processor, one does not. - * - * We make no fairness assumptions. They have a cost. - */ - -#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0) -#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) - -#define spin_lock_string \ - "\n1:\t" \ - "lock ; decb %0\n\t" \ - "js 2f\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\t" \ - "cmpb $0,%0\n\t" \ - "rep;nop\n\t" \ - "jle 2b\n\t" \ - "jmp 1b\n" \ - ".previous" - -/* - * This works. Despite all the confusion. - * (except on PPro SMP or if we are using OOSTORE) - * (PPro errata 66, 92) - */ - -#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) - -#define spin_unlock_string \ - "movb $1,%0" \ - :"=m" (lock->lock) : : "memory" - - -static inline void spin_unlock(spinlock_t *lock) -{ -#if SPINLOCK_DEBUG - if (lock->magic != SPINLOCK_MAGIC) - BUG(); - if (!spin_is_locked(lock)) - BUG(); -#endif - __asm__ __volatile__( - spin_unlock_string - ); -} - -#else - -#define spin_unlock_string \ - "xchgb %b0, %1" \ - :"=q" (oldval), "=m" (lock->lock) \ - :"0" (oldval) : "memory" - -static inline void spin_unlock(spinlock_t *lock) -{ - char oldval = 1; -#if SPINLOCK_DEBUG - if (lock->magic != SPINLOCK_MAGIC) - BUG(); - if (!spin_is_locked(lock)) - BUG(); -#endif - __asm__ __volatile__( - spin_unlock_string - ); -} - -#endif - -static inline int spin_trylock(spinlock_t *lock) -{ - char oldval; - __asm__ __volatile__( - "xchgb %b0,%1" - :"=q" (oldval), "=m" (lock->lock) - :"0" (0) : "memory"); - return oldval > 0; -} - -static inline void spin_lock(spinlock_t *lock) -{ -#if SPINLOCK_DEBUG - __label__ here; -here: - if (lock->magic != SPINLOCK_MAGIC) { -printk("eip: %p\n", &&here); - BUG(); - } -#endif - __asm__ __volatile__( - spin_lock_string - :"=m" (lock->lock) : : "memory"); -} - - -/* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - */ -typedef struct { - volatile unsigned int lock; -#if SPINLOCK_DEBUG - unsigned magic; -#endif -} rwlock_t; - -#define RWLOCK_MAGIC 0xdeaf1eed - -#if SPINLOCK_DEBUG -#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC -#else -#define RWLOCK_MAGIC_INIT /* */ -#endif - -#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } - -#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) - -/* - * On x86, we implement read-write locks as a 32-bit counter - * with the high bit (sign) being the "contended" bit. - * - * The inline assembly is non-obvious. Think about it. - * - * Changed to use the same technique as rw semaphores. See - * semaphore.h for details. -ben - */ -/* the spinlock helpers are in arch/i386/kernel/semaphore.c */ - -static inline void read_lock(rwlock_t *rw) -{ -#if SPINLOCK_DEBUG - if (rw->magic != RWLOCK_MAGIC) - BUG(); -#endif - __build_read_lock(rw, "__read_lock_failed"); -} - -static inline void write_lock(rwlock_t *rw) -{ -#if SPINLOCK_DEBUG - if (rw->magic != RWLOCK_MAGIC) - BUG(); -#endif - __build_write_lock(rw, "__write_lock_failed"); -} - -#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") -#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") - -static inline int write_trylock(rwlock_t *lock) -{ - atomic_t *count = (atomic_t *)lock; - if (atomic_sub_and_test(RW_LOCK_BIAS, count)) - return 1; - atomic_add(RW_LOCK_BIAS, count); - return 0; -} - -#endif /* __ASM_SPINLOCK_H */ diff --git a/xen/include/asm-i386/string.h b/xen/include/asm-i386/string.h deleted file mode 100644 index bef20a71d5..0000000000 --- a/xen/include/asm-i386/string.h +++ /dev/null @@ -1,500 +0,0 @@ -#ifndef _I386_STRING_H_ -#define _I386_STRING_H_ - -#ifdef __KERNEL__ -#include -/* - * On a 486 or Pentium, we are better off not using the - * byte string operations. But on a 386 or a PPro the - * byte string ops are faster than doing it by hand - * (MUCH faster on a Pentium). - * - * Also, the byte strings actually work correctly. Forget - * the i486 routines for now as they may be broken.. - */ -#if FIXED_486_STRING && defined(CONFIG_X86_USE_STRING_486) -#include -#else - -/* - * This string-include defines all string functions as inline - * functions. Use gcc. It also assumes ds=es=data space, this should be - * normal. Most of the string-functions are rather heavily hand-optimized, - * see especially strtok,strstr,str[c]spn. They should work, but are not - * very easy to understand. Everything is done entirely within the register - * set, making the functions fast and clean. String instructions have been - * used through-out, making for "slightly" unclear code :-) - * - * NO Copyright (C) 1991, 1992 Linus Torvalds, - * consider these trivial functions to be PD. - */ - - -#define __HAVE_ARCH_STRCPY -static inline char * strcpy(char * dest,const char *src) -{ -int d0, d1, d2; -__asm__ __volatile__( - "1:\tlodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b" - : "=&S" (d0), "=&D" (d1), "=&a" (d2) - :"0" (src),"1" (dest) : "memory"); -return dest; -} - -#define __HAVE_ARCH_STRNCPY -static inline char * strncpy(char * dest,const char *src,size_t count) -{ -int d0, d1, d2, d3; -__asm__ __volatile__( - "1:\tdecl %2\n\t" - "js 2f\n\t" - "lodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "rep\n\t" - "stosb\n" - "2:" - : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) - :"0" (src),"1" (dest),"2" (count) : "memory"); -return dest; -} - -#define __HAVE_ARCH_STRCAT -static inline char * strcat(char * dest,const char * src) -{ -int d0, d1, d2, d3; -__asm__ __volatile__( - "repne\n\t" - "scasb\n\t" - "decl %1\n" - "1:\tlodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src), "1" (dest), "2" (0), "3" (0xffffffff):"memory"); -return dest; -} - -#define __HAVE_ARCH_STRNCAT -static inline char * strncat(char * dest,const char * src,size_t count) -{ -int d0, d1, d2, d3; -__asm__ __volatile__( - "repne\n\t" - "scasb\n\t" - "decl %1\n\t" - "movl %8,%3\n" - "1:\tdecl %3\n\t" - "js 2f\n\t" - "lodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n" - "2:\txorl %2,%2\n\t" - "stosb" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count) - : "memory"); -return dest; -} - -#define __HAVE_ARCH_STRCMP -static inline int strcmp(const char * cs,const char * ct) -{ -int d0, d1; -register int __res; -__asm__ __volatile__( - "1:\tlodsb\n\t" - "scasb\n\t" - "jne 2f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "xorl %%eax,%%eax\n\t" - "jmp 3f\n" - "2:\tsbbl %%eax,%%eax\n\t" - "orb $1,%%al\n" - "3:" - :"=a" (__res), "=&S" (d0), "=&D" (d1) - :"1" (cs),"2" (ct)); -return __res; -} - -#define __HAVE_ARCH_STRNCMP -static inline int strncmp(const char * cs,const char * ct,size_t count) -{ -register int __res; -int d0, d1, d2; -__asm__ __volatile__( - "1:\tdecl %3\n\t" - "js 2f\n\t" - "lodsb\n\t" - "scasb\n\t" - "jne 3f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n" - "2:\txorl %%eax,%%eax\n\t" - "jmp 4f\n" - "3:\tsbbl %%eax,%%eax\n\t" - "orb $1,%%al\n" - "4:" - :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) - :"1" (cs),"2" (ct),"3" (count)); -return __res; -} - -#define __HAVE_ARCH_STRCHR -static inline char * strchr(const char * s, int c) -{ -int d0; -register char * __res; -__asm__ __volatile__( - "movb %%al,%%ah\n" - "1:\tlodsb\n\t" - "cmpb %%ah,%%al\n\t" - "je 2f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "movl $1,%1\n" - "2:\tmovl %1,%0\n\t" - "decl %0" - :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c)); -return __res; -} - -#define __HAVE_ARCH_STRRCHR -static inline char * strrchr(const char * s, int c) -{ -int d0, d1; -register char * __res; -__asm__ __volatile__( - "movb %%al,%%ah\n" - "1:\tlodsb\n\t" - "cmpb %%ah,%%al\n\t" - "jne 2f\n\t" - "leal -1(%%esi),%0\n" - "2:\ttestb %%al,%%al\n\t" - "jne 1b" - :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c)); -return __res; -} - -#define __HAVE_ARCH_STRLEN -static inline size_t strlen(const char * s) -{ -int d0; -register int __res; -__asm__ __volatile__( - "repne\n\t" - "scasb\n\t" - "notl %0\n\t" - "decl %0" - :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff)); -return __res; -} - -static inline void * __memcpy(void * to, const void * from, size_t n) -{ -int d0, d1, d2; -__asm__ __volatile__( - "rep ; movsl\n\t" - "testb $2,%b4\n\t" - "je 1f\n\t" - "movsw\n" - "1:\ttestb $1,%b4\n\t" - "je 2f\n\t" - "movsb\n" - "2:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) - : "memory"); -return (to); -} - -/* - * This looks horribly ugly, but the compiler can optimize it totally, - * as the count is constant. - */ -static inline void * __constant_memcpy(void * to, const void * from, size_t n) -{ - switch (n) { - case 0: - return to; - case 1: - *(unsigned char *)to = *(const unsigned char *)from; - return to; - case 2: - *(unsigned short *)to = *(const unsigned short *)from; - return to; - case 3: - *(unsigned short *)to = *(const unsigned short *)from; - *(2+(unsigned char *)to) = *(2+(const unsigned char *)from); - return to; - case 4: - *(unsigned long *)to = *(const unsigned long *)from; - return to; - case 6: /* for Ethernet addresses */ - *(unsigned long *)to = *(const unsigned long *)from; - *(2+(unsigned short *)to) = *(2+(const unsigned short *)from); - return to; - case 8: - *(unsigned long *)to = *(const unsigned long *)from; - *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); - return to; - case 12: - *(unsigned long *)to = *(const unsigned long *)from; - *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); - *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); - return to; - case 16: - *(unsigned long *)to = *(const unsigned long *)from; - *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); - *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); - *(3+(unsigned long *)to) = *(3+(const unsigned long *)from); - return to; - case 20: - *(unsigned long *)to = *(const unsigned long *)from; - *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); - *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); - *(3+(unsigned long *)to) = *(3+(const unsigned long *)from); - *(4+(unsigned long *)to) = *(4+(const unsigned long *)from); - return to; - } -#define COMMON(x) \ -__asm__ __volatile__( \ - "rep ; movsl" \ - x \ - : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ - : "0" (n/4),"1" ((long) to),"2" ((long) from) \ - : "memory"); -{ - int d0, d1, d2; - switch (n % 4) { - case 0: COMMON(""); return to; - case 1: COMMON("\n\tmovsb"); return to; - case 2: COMMON("\n\tmovsw"); return to; - default: COMMON("\n\tmovsw\n\tmovsb"); return to; - } -} - -#undef COMMON -} - -#define __HAVE_ARCH_MEMCPY - -#define memcpy(t, f, n) \ -(__builtin_constant_p(n) ? \ - __constant_memcpy((t),(f),(n)) : \ - __memcpy((t),(f),(n))) - - -/* - * struct_cpy(x,y), copy structure *x into (matching structure) *y. - * - * We get link-time errors if the structure sizes do not match. - * There is no runtime overhead, it's all optimized away at - * compile time. - */ -//extern void __struct_cpy_bug (void); - -/* -#define struct_cpy(x,y) \ -({ \ - if (sizeof(*(x)) != sizeof(*(y))) \ - __struct_cpy_bug; \ - memcpy(x, y, sizeof(*(x))); \ -}) -*/ - -#define __HAVE_ARCH_MEMMOVE -static inline void * memmove(void * dest,const void * src, size_t n) -{ -int d0, d1, d2; -if (dest -#include - -/* Clear and set 'TS' bit respectively */ -#define clts() __asm__ __volatile__ ("clts") -#define stts() write_cr0(X86_CR0_TS|read_cr0()) - -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory"); - -static inline unsigned long get_limit(unsigned long segment) -{ - unsigned long __limit; - __asm__("lsll %1,%0" - :"=r" (__limit):"r" (segment)); - return __limit+1; -} - -#define nop() __asm__ __volatile__ ("nop") - -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) - -struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((struct __xchg_dummy *)(x)) - - -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK - */ -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) -{ - switch (size) { - case 1: - __asm__ __volatile__("xchgb %b0,%1" - :"=q" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 2: - __asm__ __volatile__("xchgw %w0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 4: - __asm__ __volatile__("xchgl %0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - } - return x; -} - -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -#define cmpxchg(ptr,o,n)\ - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ - (unsigned long)(n),sizeof(*(ptr)))) - - -/* - * This function causes longword _o to be changed to _n at location _p. - * If this access causes a fault then we return 1, otherwise we return 0. - * If no fault occurs then _o is updated to teh value we saw at _p. If this - * is the same as the initial value of _o then _n is written to location _p. - */ -#define cmpxchg_user(_p,_o,_n) \ -({ \ - int _rc; \ - __asm__ __volatile__ ( \ - "1: " LOCK_PREFIX "cmpxchgl %2,%3\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $1,%1\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,3b\n" \ - ".previous" \ - : "=a" (_o), "=r" (_rc) \ - : "q" (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \ - : "memory"); \ - _rc; \ -}) - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - * - * For now, "wmb()" doesn't actually do anything, as all - * Intel CPU's follow what Intel calls a *Processor Order*, - * in which all writes are seen in the program order even - * outside the CPU. - * - * I expect future Intel CPU's to have a weaker ordering, - * but I'd also expect them to finally get their act together - * and add some real memory barriers if so. - * - * Some non intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ - -#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") -#define rmb() mb() - -#ifdef CONFIG_X86_OOSTORE -#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") -#else -#define wmb() __asm__ __volatile__ ("": : :"memory") -#endif - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#endif - -#define set_mb(var, value) do { xchg(&var, value); } while (0) -#define set_wmb(var, value) do { var = value; wmb(); } while (0) - -/* interrupt control.. */ -#define __save_flags(x) __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */) -#define __restore_flags(x) __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc") -#define __cli() __asm__ __volatile__("cli": : :"memory") -#define __sti() __asm__ __volatile__("sti": : :"memory") -/* used in the idle loop; sti takes one instruction cycle to complete */ -#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") - -/* For spinlocks etc */ -#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") -#define local_irq_restore(x) __restore_flags(x) -#define local_irq_disable() __cli() -#define local_irq_enable() __sti() - -static inline int local_irq_is_enabled(void) -{ - unsigned long flags; - __save_flags(flags); - return !!(flags & (1<<9)); /* EFLAGS_IF */ -} - -#ifdef CONFIG_SMP - -extern void __global_cli(void); -extern void __global_sti(void); -extern unsigned long __global_save_flags(void); -extern void __global_restore_flags(unsigned long); -#define cli() __global_cli() -#define sti() __global_sti() -#define save_flags(x) ((x)=__global_save_flags()) -#define restore_flags(x) __global_restore_flags(x) - -#else - -#define cli() __cli() -#define sti() __sti() -#define save_flags(x) __save_flags(x) -#define restore_flags(x) __restore_flags(x) - -#endif - -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -void disable_hlt(void); -void enable_hlt(void); - -#define BROKEN_ACPI_Sx 0x0001 -#define BROKEN_INIT_AFTER_S1 0x0002 - -#endif diff --git a/xen/include/asm-i386/time.h b/xen/include/asm-i386/time.h deleted file mode 100644 index ed3a15bfb2..0000000000 --- a/xen/include/asm-i386/time.h +++ /dev/null @@ -1,21 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- - **************************************************************************** - * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge - **************************************************************************** - * - * File: time.h - * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) - * - * Environment: Xen Hypervisor - * Description: Architecture dependent definition of time variables - */ - -#ifndef _ASM_TIME_H_ -#define _ASM_TIME_H_ - -#include -#include - -typedef s64 s_time_t; /* system time */ - -#endif /* _ASM_TIME_H_ */ diff --git a/xen/include/asm-i386/timex.h b/xen/include/asm-i386/timex.h deleted file mode 100644 index 4b0a93fc87..0000000000 --- a/xen/include/asm-i386/timex.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * linux/include/asm-i386/timex.h - * - * i386 architecture timex specifications - */ -#ifndef _ASMi386_TIMEX_H -#define _ASMi386_TIMEX_H - -#include -#include - -#ifdef CONFIG_MELAN -# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */ -#else -# define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -#endif - -#define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */ -#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ - (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ - << (SHIFT_SCALE-SHIFT_HZ)) / HZ) - -/* - * Standard way to access the cycle counter on i586+ CPUs. - * Currently only used on SMP. - * - * If you really have a SMP machine with i486 chips or older, - * compile for that, and this will just always return zero. - * That's ok, it just means that the nicer scheduling heuristics - * won't work for you. - * - * We only use the low 32 bits, and we'd simply better make sure - * that we reschedule before that wraps. Scheduling at least every - * four billion cycles just basically sounds like a good idea, - * regardless of how fast the machine is. - */ -typedef unsigned long long cycles_t; - -extern cycles_t cacheflush_time; - -static inline cycles_t get_cycles (void) -{ -#ifndef CONFIG_X86_TSC - return 0; -#else - unsigned long long ret; - - rdtscll(ret); - return ret; -#endif -} - -extern unsigned long cpu_khz; - -#define vxtime_lock() do {} while (0) -#define vxtime_unlock() do {} while (0) - -#endif diff --git a/xen/include/asm-i386/types.h b/xen/include/asm-i386/types.h deleted file mode 100644 index ebfaf85f68..0000000000 --- a/xen/include/asm-i386/types.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef _I386_TYPES_H -#define _I386_TYPES_H - -typedef unsigned short umode_t; - -typedef unsigned int size_t; - -/* - * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the - * header files exported to user space - */ - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -#if defined(__GNUC__) && !defined(__STRICT_ANSI__) -typedef __signed__ long long __s64; -typedef unsigned long long __u64; -#endif - -#include - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long long s64; -typedef unsigned long long u64; - -#define BITS_PER_LONG 32 - -/* DMA addresses come in generic and 64-bit flavours. */ - -#ifdef CONFIG_HIGHMEM -typedef u64 dma_addr_t; -#else -typedef u32 dma_addr_t; -#endif -typedef u64 dma64_addr_t; - -#endif diff --git a/xen/include/asm-i386/uaccess.h b/xen/include/asm-i386/uaccess.h deleted file mode 100644 index bb2616336d..0000000000 --- a/xen/include/asm-i386/uaccess.h +++ /dev/null @@ -1,600 +0,0 @@ -#ifndef __i386_UACCESS_H -#define __i386_UACCESS_H - -/* - * User space memory access functions - */ -#include -#include -#include -#include -#include - -#define VERIFY_READ 0 -#define VERIFY_WRITE 1 - -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - - -#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) -#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) - -#define get_ds() (KERNEL_DS) -#define get_fs() (current->addr_limit) -#define set_fs(x) (current->addr_limit = (x)) - -#define segment_eq(a,b) ((a).seg == (b).seg) - -extern int __verify_write(const void *, unsigned long); - -#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg)) - -/* - * Uhhuh, this needs 33-bit arithmetic. We have a carry.. - */ -#define __range_ok(addr,size) ({ \ - unsigned long flag,sum; \ - asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \ - :"=&r" (flag), "=r" (sum) \ - :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \ - flag; }) - -#define access_ok(type,addr,size) (__range_ok(addr,size) == 0) - -static inline int verify_area(int type, const void * addr, unsigned long size) -{ - return access_ok(type,addr,size) ? 0 : -EFAULT; -} - - -/* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - */ - -struct exception_table_entry -{ - unsigned long insn, fixup; -}; - -/* Returns 0 if exception not found and fixup otherwise. */ -extern unsigned long search_exception_table(unsigned long); - - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - * - * This gets kind of ugly. We want to return _two_ values in "get_user()" - * and yet we don't want to do any pointers, because that is too much - * of a performance impact. Thus we have a few rather ugly macros here, - * and hide all the uglyness from the user. - * - * The "__xxx" versions of the user access functions are versions that - * do not verify the address space, that must have been done previously - * with a separate "access_ok()" call (this is used when we do multiple - * accesses to the same area of user memory). - */ - -extern void __get_user_1(void); -extern void __get_user_2(void); -extern void __get_user_4(void); - -#define __get_user_x(size,ret,x,ptr) \ - __asm__ __volatile__("call __get_user_" #size \ - :"=a" (ret),"=d" (x) \ - :"0" (ptr)) - -/* Careful: we have to cast the result to the type of the pointer for sign reasons */ -#define get_user(x,ptr) \ -({ int __ret_gu=1,__val_gu; \ - switch(sizeof (*(ptr))) { \ - case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1); break; \ - case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2); break; \ - case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4); break; \ - default: __ret_gu=copy_from_user(&__val_gu,ptr,8); break; \ - /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \ - /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \ - /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \ - /*default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;*/ \ - } \ - (x) = (__typeof__(*(ptr)))__val_gu; \ - __ret_gu; \ -}) - -extern void __put_user_1(void); -extern void __put_user_2(void); -extern void __put_user_4(void); -extern void __put_user_8(void); - -extern void __put_user_bad(void); - -#define put_user(x,ptr) \ - __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) - -#define __get_user(x,ptr) \ - __get_user_nocheck((x),(ptr),sizeof(*(ptr))) -#define __put_user(x,ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) - -#define __put_user_nocheck(x,ptr,size) \ -({ \ - long __pu_err; \ - __put_user_size((x),(ptr),(size),__pu_err); \ - __pu_err; \ -}) - - -#define __put_user_check(x,ptr,size) \ -({ \ - long __pu_err = -EFAULT; \ - __typeof__(*(ptr)) *__pu_addr = (ptr); \ - if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ - __put_user_size((x),__pu_addr,(size),__pu_err); \ - __pu_err; \ -}) - -#define __put_user_u64(x, addr, err) \ - __asm__ __volatile__( \ - "1: movl %%eax,0(%2)\n" \ - "2: movl %%edx,4(%2)\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: movl %3,%0\n" \ - " jmp 3b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,4b\n" \ - " .long 2b,4b\n" \ - ".previous" \ - : "=r"(err) \ - : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err)) - -#define __put_user_size(x,ptr,size,retval) \ -do { \ - retval = 0; \ - switch (size) { \ - case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \ - case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \ - case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break; \ - case 8: __put_user_u64(x,ptr,retval); break; \ - default: __put_user_bad(); \ - } \ -} while (0) - -struct __large_struct { unsigned long buf[100]; }; -#define __m(x) (*(struct __large_struct *)(x)) - -/* - * Tell gcc we read from memory instead of writing: this is because - * we do not write to any memory gcc knows about, so there are no - * aliasing issues. - */ -#define __put_user_asm(x, addr, err, itype, rtype, ltype) \ - __asm__ __volatile__( \ - "1: mov"itype" %"rtype"1,%2\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %3,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,3b\n" \ - ".previous" \ - : "=r"(err) \ - : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err)) - - -#define __get_user_nocheck(x,ptr,size) \ -({ \ - long __gu_err, __gu_val; \ - __get_user_size(__gu_val,(ptr),(size),__gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ - __gu_err; \ -}) - -extern long __get_user_bad(void); - -#define __get_user_size(x,ptr,size,retval) \ -do { \ - retval = 0; \ - switch (size) { \ - case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \ - case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \ - case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break; \ - default: (x) = __get_user_bad(); \ - } \ -} while (0) - -#define __get_user_asm(x, addr, err, itype, rtype, ltype) \ - __asm__ __volatile__( \ - "1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %3,%0\n" \ - " xor"itype" %"rtype"1,%"rtype"1\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,3b\n" \ - ".previous" \ - : "=r"(err), ltype (x) \ - : "m"(__m(addr)), "i"(-EFAULT), "0"(err)) - - -/* - * Copy To/From Userspace - */ - -/* Generic arbitrary sized copy. */ -#define __copy_user(to,from,size) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ - : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) - -#define __copy_user_zeroing(to,from,size) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - "4: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ - : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) - -/* We let the __ versions of copy_from/to_user inline, because they're often - * used in fast paths and have only a small space overhead. - */ -static inline unsigned long -__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n) -{ - __copy_user_zeroing(to,from,n); - return n; -} - -static inline unsigned long -__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n) -{ - __copy_user(to,from,n); - return n; -} - - -/* Optimize just a little bit when we know the size of the move. */ -#define __constant_copy_user(to, from, size) \ -do { \ - int __d0, __d1; \ - switch (size & 3) { \ - default: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1:\n" \ - ".section .fixup,\"ax\"\n" \ - "2: shl $2,%0\n" \ - " jmp 1b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,2b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 1: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: shl $2,%0\n" \ - "4: incl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 2: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: shl $2,%0\n" \ - "4: addl $2,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 3: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2: movsb\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: shl $2,%0\n" \ - "5: addl $2,%0\n" \ - "6: incl %0\n" \ - " jmp 3b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,4b\n" \ - " .long 1b,5b\n" \ - " .long 2b,6b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - } \ -} while (0) - -/* Optimize just a little bit when we know the size of the move. */ -#define __constant_copy_user_zeroing(to, from, size) \ -do { \ - int __d0, __d1; \ - switch (size & 3) { \ - default: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1:\n" \ - ".section .fixup,\"ax\"\n" \ - "2: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " jmp 1b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,2b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 1: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " incl %0\n" \ - " jmp 2b\n" \ - "4: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " incl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 2: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " stosw\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " addl $2,%0\n" \ - " jmp 2b\n" \ - "4: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosw\n" \ - " popl %%eax\n" \ - " addl $2,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 3: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2: movsb\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " stosw\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " addl $3,%0\n" \ - " jmp 2b\n" \ - "5: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosw\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " addl $3,%0\n" \ - " jmp 2b\n" \ - "6: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " incl %0\n" \ - " jmp 3b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,4b\n" \ - " .long 1b,5b\n" \ - " .long 2b,6b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - } \ -} while (0) - -unsigned long __generic_copy_to_user(void *, const void *, unsigned long); -unsigned long __generic_copy_from_user(void *, const void *, unsigned long); - -static inline unsigned long -__constant_copy_to_user(void *to, const void *from, unsigned long n) -{ - prefetch(from); - if (access_ok(VERIFY_WRITE, to, n)) - __constant_copy_user(to,from,n); - return n; -} - -static inline unsigned long -__constant_copy_from_user(void *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - __constant_copy_user_zeroing(to,from,n); - else - memset(to, 0, n); - return n; -} - -static inline unsigned long -__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n) -{ - __constant_copy_user(to,from,n); - return n; -} - -static inline unsigned long -__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n) -{ - __constant_copy_user_zeroing(to,from,n); - return n; -} - -#define copy_to_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user((to),(from),(n)) : \ - __generic_copy_to_user((to),(from),(n))) - -#define copy_from_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user((to),(from),(n)) : \ - __generic_copy_from_user((to),(from),(n))) - -#define __copy_to_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user_nocheck((to),(from),(n)) : \ - __generic_copy_to_user_nocheck((to),(from),(n))) - -#define __copy_from_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user_nocheck((to),(from),(n)) : \ - __generic_copy_from_user_nocheck((to),(from),(n))) - -long strncpy_from_user(char *dst, const char *src, long count); -long __strncpy_from_user(char *dst, const char *src, long count); -#define strlen_user(str) strnlen_user(str, ~0UL >> 1) -long strnlen_user(const char *str, long n); -unsigned long clear_user(void *mem, unsigned long len); -unsigned long __clear_user(void *mem, unsigned long len); - -#endif /* __i386_UACCESS_H */ diff --git a/xen/include/asm-i386/unaligned.h b/xen/include/asm-i386/unaligned.h deleted file mode 100644 index 7acd795762..0000000000 --- a/xen/include/asm-i386/unaligned.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef __I386_UNALIGNED_H -#define __I386_UNALIGNED_H - -/* - * The i386 can do unaligned accesses itself. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. - */ - -/** - * get_unaligned - get value from possibly mis-aligned location - * @ptr: pointer to value - * - * This macro should be used for accessing values larger in size than - * single bytes at locations that are expected to be improperly aligned, - * e.g. retrieving a u16 value from a location not u16-aligned. - * - * Note that unaligned accesses can be very expensive on some architectures. - */ -#define get_unaligned(ptr) (*(ptr)) - -/** - * put_unaligned - put value to a possibly mis-aligned location - * @val: value to place - * @ptr: pointer to location - * - * This macro should be used for placing values larger in size than - * single bytes at locations that are expected to be improperly aligned, - * e.g. writing a u16 value to a location not u16-aligned. - * - * Note that unaligned accesses can be very expensive on some architectures. - */ -#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) - -#endif diff --git a/xen/include/asm-x86/acpi.h b/xen/include/asm-x86/acpi.h new file mode 100644 index 0000000000..4d750d486f --- /dev/null +++ b/xen/include/asm-x86/acpi.h @@ -0,0 +1,170 @@ +/* + * asm-i386/acpi.h + * + * Copyright (C) 2001 Paul Diefenbaugh + * Copyright (C) 2001 Patrick Mochel + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#ifndef _ASM_ACPI_H +#define _ASM_ACPI_H + +#ifdef __KERNEL__ + +#define COMPILER_DEPENDENT_INT64 long long +#define COMPILER_DEPENDENT_UINT64 unsigned long long + +/* + * Calling conventions: + * + * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) + * ACPI_EXTERNAL_XFACE - External ACPI interfaces + * ACPI_INTERNAL_XFACE - Internal ACPI interfaces + * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces + */ +#define ACPI_SYSTEM_XFACE +#define ACPI_EXTERNAL_XFACE +#define ACPI_INTERNAL_XFACE +#define ACPI_INTERNAL_VAR_XFACE + +/* Asm macros */ + +#define ACPI_ASM_MACROS +#define BREAKPOINT3 +#define ACPI_DISABLE_IRQS() __cli() +#define ACPI_ENABLE_IRQS() __sti() +#define ACPI_FLUSH_CPU_CACHE() wbinvd() + +/* + * A brief explanation as GNU inline assembly is a bit hairy + * %0 is the output parameter in EAX ("=a") + * %1 and %2 are the input parameters in ECX ("c") + * and an immediate value ("i") respectively + * All actual register references are preceded with "%%" as in "%%edx" + * Immediate values in the assembly are preceded by "$" as in "$0x1" + * The final asm parameter are the operation altered non-output registers. + */ +#define ACPI_ACQUIRE_GLOBAL_LOCK(GLptr, Acq) \ + do { \ + int dummy; \ + asm("1: movl (%1),%%eax;" \ + "movl %%eax,%%edx;" \ + "andl %2,%%edx;" \ + "btsl $0x1,%%edx;" \ + "adcl $0x0,%%edx;" \ + "lock; cmpxchgl %%edx,(%1);" \ + "jnz 1b;" \ + "cmpb $0x3,%%dl;" \ + "sbbl %%eax,%%eax" \ + :"=a"(Acq),"=c"(dummy):"c"(GLptr),"i"(~1L):"dx"); \ + } while(0) + +#define ACPI_RELEASE_GLOBAL_LOCK(GLptr, Acq) \ + do { \ + int dummy; \ + asm("1: movl (%1),%%eax;" \ + "movl %%eax,%%edx;" \ + "andl %2,%%edx;" \ + "lock; cmpxchgl %%edx,(%1);" \ + "jnz 1b;" \ + "andl $0x1,%%eax" \ + :"=a"(Acq),"=c"(dummy):"c"(GLptr),"i"(~3L):"dx"); \ + } while(0) + + +/* + * Math helper asm macros + */ +#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ + asm("divl %2;" \ + :"=a"(q32), "=d"(r32) \ + :"r"(d32), \ + "0"(n_lo), "1"(n_hi)) + + +#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ + asm("shrl $1,%2;" \ + "rcrl $1,%3;" \ + :"=r"(n_hi), "=r"(n_lo) \ + :"0"(n_hi), "1"(n_lo)) + + +#ifdef CONFIG_ACPI_BOOT +extern int acpi_lapic; +extern int acpi_ioapic; +extern int acpi_noirq; + +/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */ +#define FIX_ACPI_PAGES 4 + +#else /* !CONFIG_ACPI_BOOT */ +# define acpi_lapic 0 +# define acpi_ioapic 0 + +#endif /* !CONFIG_ACPI_BOOT */ + +#ifdef CONFIG_ACPI_PCI +static inline void acpi_noirq_set(void) { acpi_noirq = 1; } +extern int acpi_irq_balance_set(char *str); +#else +static inline void acpi_noirq_set(void) { } +static inline int acpi_irq_balance_set(char *str) { return 0; } +#endif + +#ifdef CONFIG_ACPI_SLEEP + +extern unsigned long saved_eip; +extern unsigned long saved_esp; +extern unsigned long saved_ebp; +extern unsigned long saved_ebx; +extern unsigned long saved_esi; +extern unsigned long saved_edi; + +static inline void acpi_save_register_state(unsigned long return_point) +{ + saved_eip = return_point; + asm volatile ("movl %%esp,(%0)" : "=m" (saved_esp)); + asm volatile ("movl %%ebp,(%0)" : "=m" (saved_ebp)); + asm volatile ("movl %%ebx,(%0)" : "=m" (saved_ebx)); + asm volatile ("movl %%edi,(%0)" : "=m" (saved_edi)); + asm volatile ("movl %%esi,(%0)" : "=m" (saved_esi)); +} + +#define acpi_restore_register_state() do {} while (0) + + +/* routines for saving/restoring kernel state */ +extern int acpi_save_state_mem(void); +extern int acpi_save_state_disk(void); +extern void acpi_restore_state_mem(void); + +extern unsigned long acpi_wakeup_address; + +extern void do_suspend_lowlevel_s4bios(int resume); + +/* early initialization routine */ +extern void acpi_reserve_bootmem(void); + +#endif /*CONFIG_ACPI_SLEEP*/ + + +#endif /*__KERNEL__*/ + +#endif /*_ASM_ACPI_H*/ diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h new file mode 100644 index 0000000000..589692a2fd --- /dev/null +++ b/xen/include/asm-x86/apic.h @@ -0,0 +1,101 @@ +#ifndef __ASM_APIC_H +#define __ASM_APIC_H + +#include +#include +#include +#include + +#ifdef CONFIG_X86_LOCAL_APIC + +#define APIC_DEBUG 0 + +#if APIC_DEBUG +#define Dprintk(x...) printk(x) +#else +#define Dprintk(x...) +#endif + +/* + * Basic functions accessing APICs. + */ + +static __inline void apic_write(unsigned long reg, u32 v) +{ + *((volatile u32 *)(APIC_BASE+reg)) = v; +} + +static __inline void apic_write_atomic(unsigned long reg, u32 v) +{ + xchg((volatile u32 *)(APIC_BASE+reg), v); +} + +static __inline u32 apic_read(unsigned long reg) +{ + return *((volatile u32 *)(APIC_BASE+reg)); +} + +static __inline__ void apic_wait_icr_idle(void) +{ + do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ); +} + +#ifdef CONFIG_X86_GOOD_APIC +# define FORCE_READ_AROUND_WRITE 0 +# define apic_read_around(x) +# define apic_write_around(x,y) apic_write((x),(y)) +#else +# define FORCE_READ_AROUND_WRITE 1 +# define apic_read_around(x) apic_read(x) +# define apic_write_around(x,y) apic_write_atomic((x),(y)) +#endif + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction: + * - a single rmw on Pentium/82489DX + * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write_around(APIC_EOI, 0); +} + +extern int get_maxlvt(void); +extern void clear_local_APIC(void); +extern void connect_bsp_APIC (void); +extern void disconnect_bsp_APIC (void); +extern void disable_local_APIC (void); +extern int verify_local_APIC (void); +extern void cache_APIC_registers (void); +extern void sync_Arb_IDs (void); +extern void init_bsp_APIC (void); +extern void setup_local_APIC (void); +extern void init_apic_mappings (void); +extern void smp_local_timer_interrupt (struct pt_regs * regs); +extern void setup_APIC_clocks (void); +extern void setup_apic_nmi_watchdog (void); +extern inline void nmi_watchdog_tick (struct pt_regs * regs); +extern int APIC_init_uniprocessor (void); +extern void disable_APIC_timer(void); +extern void enable_APIC_timer(void); + +/*extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);*/ +/*extern void apic_pm_unregister(struct pm_dev*);*/ + +extern unsigned int watchdog_on; + +extern unsigned int apic_timer_irqs [NR_CPUS]; +extern int check_nmi_watchdog (void); + +extern unsigned int nmi_watchdog; +#define NMI_NONE 0 +#define NMI_IO_APIC 1 +#define NMI_LOCAL_APIC 2 +#define NMI_INVALID 3 + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#endif /* __ASM_APIC_H */ diff --git a/xen/include/asm-x86/apicdef.h b/xen/include/asm-x86/apicdef.h new file mode 100644 index 0000000000..9f07409b3f --- /dev/null +++ b/xen/include/asm-x86/apicdef.h @@ -0,0 +1,379 @@ +#ifndef __ASM_APICDEF_H +#define __ASM_APICDEF_H + +/* + * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) + * + * Alan Cox , 1995. + * Ingo Molnar , 1999, 2000 + */ + +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +#define APIC_ID 0x20 +#define APIC_ID_MASK (0x0F<<24) +#define GET_APIC_ID(x) (((x)>>24)&0x0F) +#define APIC_LVR 0x30 +#define APIC_LVR_MASK 0xFF00FF +#define GET_APIC_VERSION(x) ((x)&0xFF) +#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF) +#define APIC_INTEGRATED(x) ((x)&0xF0) +#define APIC_XAPIC_SUPPORT(x) ((x)>=0x14) +#define APIC_TASKPRI 0x80 +#define APIC_TPRI_MASK 0xFF +#define APIC_ARBPRI 0x90 +#define APIC_ARBPRI_MASK 0xFF +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */ +#define APIC_RRR 0xC0 +#define APIC_LDR 0xD0 +#define APIC_LDR_MASK (0xFF<<24) +#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF) +#define SET_APIC_LOGICAL_ID(x) (((x)<<24)) +#define APIC_ALL_CPUS 0xFF +#define APIC_DFR 0xE0 +#define APIC_DFR_CLUSTER 0x0FFFFFFFul /* Clustered */ +#define APIC_DFR_FLAT 0xFFFFFFFFul /* Flat mode */ +#define APIC_SPIV 0xF0 +#define APIC_SPIV_FOCUS_DISABLED (1<<9) +#define APIC_SPIV_APIC_ENABLED (1<<8) +#define APIC_ISR 0x100 +#define APIC_TMR 0x180 +#define APIC_IRR 0x200 +#define APIC_ESR 0x280 +#define APIC_ESR_SEND_CS 0x00001 +#define APIC_ESR_RECV_CS 0x00002 +#define APIC_ESR_SEND_ACC 0x00004 +#define APIC_ESR_RECV_ACC 0x00008 +#define APIC_ESR_SENDILL 0x00020 +#define APIC_ESR_RECVILL 0x00040 +#define APIC_ESR_ILLREGA 0x00080 +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF) +#define SET_APIC_DEST_FIELD(x) ((x)<<24) +#define APIC_LVTT 0x320 +#define APIC_LVTPC 0x340 +#define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_BASE_MASK (0x3<<18) +#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) +#define SET_APIC_TIMER_BASE(x) (((x)<<18)) +#define APIC_TIMER_BASE_CLKIN 0x0 +#define APIC_TIMER_BASE_TMBASE 0x1 +#define APIC_TIMER_BASE_DIV 0x2 +#define APIC_LVT_TIMER_PERIODIC (1<<17) +#define APIC_LVT_MASKED (1<<16) +#define APIC_LVT_LEVEL_TRIGGER (1<<15) +#define APIC_LVT_REMOTE_IRR (1<<14) +#define APIC_INPUT_POLARITY (1<<13) +#define APIC_SEND_PENDING (1<<12) +#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) +#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) +#define APIC_MODE_FIXED 0x0 +#define APIC_MODE_NMI 0x4 +#define APIC_MODE_EXINT 0x7 +#define APIC_LVT1 0x360 +#define APIC_LVTERR 0x370 +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 +#define APIC_TDR_DIV_TMBASE (1<<2) +#define APIC_TDR_DIV_1 0xB +#define APIC_TDR_DIV_2 0x0 +#define APIC_TDR_DIV_4 0x1 +#define APIC_TDR_DIV_8 0x2 +#define APIC_TDR_DIV_16 0x3 +#define APIC_TDR_DIV_32 0x8 +#define APIC_TDR_DIV_64 0x9 +#define APIC_TDR_DIV_128 0xA + +#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) + +#ifdef CONFIG_X86_CLUSTERED_APIC +#define MAX_IO_APICS 32 +#else +#define MAX_IO_APICS 8 +#endif + + +/* + * The broadcast ID is 0xF for old APICs and 0xFF for xAPICs. SAPICs + * don't broadcast (yet?), but if they did, they might use 0xFFFF. + */ +#define APIC_BROADCAST_ID_XAPIC (0xFF) +#define APIC_BROADCAST_ID_APIC (0x0F) + +/* + * the local APIC register structure, memory mapped. Not terribly well + * tested, but we might eventually use this one in the future - the + * problem why we cannot use it right now is the P5 APIC, it has an + * errata which cannot take 8-bit reads and writes, only 32-bit ones ... + */ +#define u32 unsigned int + +#define lapic ((volatile struct local_apic *)APIC_BASE) + +struct local_apic { + +/*000*/ struct { u32 __reserved[4]; } __reserved_01; + +/*010*/ struct { u32 __reserved[4]; } __reserved_02; + +/*020*/ struct { /* APIC ID Register */ + u32 __reserved_1 : 24, + phys_apic_id : 4, + __reserved_2 : 4; + u32 __reserved[3]; + } id; + +/*030*/ const + struct { /* APIC Version Register */ + u32 version : 8, + __reserved_1 : 8, + max_lvt : 8, + __reserved_2 : 8; + u32 __reserved[3]; + } version; + +/*040*/ struct { u32 __reserved[4]; } __reserved_03; + +/*050*/ struct { u32 __reserved[4]; } __reserved_04; + +/*060*/ struct { u32 __reserved[4]; } __reserved_05; + +/*070*/ struct { u32 __reserved[4]; } __reserved_06; + +/*080*/ struct { /* Task Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } tpr; + +/*090*/ const + struct { /* Arbitration Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } apr; + +/*0A0*/ const + struct { /* Processor Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } ppr; + +/*0B0*/ struct { /* End Of Interrupt Register */ + u32 eoi; + u32 __reserved[3]; + } eoi; + +/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; + +/*0D0*/ struct { /* Logical Destination Register */ + u32 __reserved_1 : 24, + logical_dest : 8; + u32 __reserved_2[3]; + } ldr; + +/*0E0*/ struct { /* Destination Format Register */ + u32 __reserved_1 : 28, + model : 4; + u32 __reserved_2[3]; + } dfr; + +/*0F0*/ struct { /* Spurious Interrupt Vector Register */ + u32 spurious_vector : 8, + apic_enabled : 1, + focus_cpu : 1, + __reserved_2 : 22; + u32 __reserved_3[3]; + } svr; + +/*100*/ struct { /* In Service Register */ +/*170*/ u32 bitfield; + u32 __reserved[3]; + } isr [8]; + +/*180*/ struct { /* Trigger Mode Register */ +/*1F0*/ u32 bitfield; + u32 __reserved[3]; + } tmr [8]; + +/*200*/ struct { /* Interrupt Request Register */ +/*270*/ u32 bitfield; + u32 __reserved[3]; + } irr [8]; + +/*280*/ union { /* Error Status Register */ + struct { + u32 send_cs_error : 1, + receive_cs_error : 1, + send_accept_error : 1, + receive_accept_error : 1, + __reserved_1 : 1, + send_illegal_vector : 1, + receive_illegal_vector : 1, + illegal_register_address : 1, + __reserved_2 : 24; + u32 __reserved_3[3]; + } error_bits; + struct { + u32 errors; + u32 __reserved_3[3]; + } all_errors; + } esr; + +/*290*/ struct { u32 __reserved[4]; } __reserved_08; + +/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; + +/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; + +/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; + +/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; + +/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; + +/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; + +/*300*/ struct { /* Interrupt Command Register 1 */ + u32 vector : 8, + delivery_mode : 3, + destination_mode : 1, + delivery_status : 1, + __reserved_1 : 1, + level : 1, + trigger : 1, + __reserved_2 : 2, + shorthand : 2, + __reserved_3 : 12; + u32 __reserved_4[3]; + } icr1; + +/*310*/ struct { /* Interrupt Command Register 2 */ + union { + u32 __reserved_1 : 24, + phys_dest : 4, + __reserved_2 : 4; + u32 __reserved_3 : 24, + logical_dest : 8; + } dest; + u32 __reserved_4[3]; + } icr2; + +/*320*/ struct { /* LVT - Timer */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + timer_mode : 1, + __reserved_3 : 14; + u32 __reserved_4[3]; + } lvt_timer; + +/*330*/ struct { u32 __reserved[4]; } __reserved_15; + +/*340*/ struct { /* LVT - Performance Counter */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_pc; + +/*350*/ struct { /* LVT - LINT0 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint0; + +/*360*/ struct { /* LVT - LINT1 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint1; + +/*370*/ struct { /* LVT - Error */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_error; + +/*380*/ struct { /* Timer Initial Count Register */ + u32 initial_count; + u32 __reserved_2[3]; + } timer_icr; + +/*390*/ const + struct { /* Timer Current Count Register */ + u32 curr_count; + u32 __reserved_2[3]; + } timer_ccr; + +/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; + +/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; + +/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; + +/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; + +/*3E0*/ struct { /* Timer Divide Configuration Register */ + u32 divisor : 4, + __reserved_1 : 28; + u32 __reserved_2[3]; + } timer_dcr; + +/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; + +} __attribute__ ((packed)); + +#undef u32 + +#endif diff --git a/xen/include/asm-x86/atomic.h b/xen/include/asm-x86/atomic.h new file mode 100644 index 0000000000..b64adaedba --- /dev/null +++ b/xen/include/asm-x86/atomic.h @@ -0,0 +1,195 @@ +#ifndef __ARCH_X86_ATOMIC__ +#define __ARCH_X86_ATOMIC__ + +#include + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +#ifdef CONFIG_SMP +#define LOCK "lock ; " +#else +#define LOCK "" +#endif + +/* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, + * not some alias that contains the same information. + */ +typedef struct { volatile int counter; } atomic_t; + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +#define atomic_read(v) ((v)->counter) + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +#define atomic_set(v,i) (((v)->counter) = (i)) + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. Note that the guaranteed useful range + * of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_add(int i, atomic_t *v) +{ + __asm__ __volatile__( + LOCK "addl %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/** + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_sub(int i, atomic_t *v) +{ + __asm__ __volatile__( + LOCK "subl %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_sub_and_test(int i, atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "subl %2,%0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_inc(atomic_t *v) +{ + __asm__ __volatile__( + LOCK "incl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_dec(atomic_t *v) +{ + __asm__ __volatile__( + LOCK "decl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_dec_and_test(atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "decl %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_inc_and_test(atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "incl %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_add_negative - add and test if negative + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_add_negative(int i, atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "addl %2,%0; sets %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/* Atomic operations are already serializing on x86 */ +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#endif /* __ARCH_X86_ATOMIC__ */ diff --git a/xen/include/asm-x86/bitops.h b/xen/include/asm-x86/bitops.h new file mode 100644 index 0000000000..58ae424e54 --- /dev/null +++ b/xen/include/asm-x86/bitops.h @@ -0,0 +1,367 @@ +#ifndef _X86_BITOPS_H +#define _X86_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +#include + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#ifdef CONFIG_SMP +#define LOCK_PREFIX "lock ; " +#else +#define LOCK_PREFIX "" +#endif + +#define ADDR (*(volatile long *) addr) + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void set_bit(long nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "bts"__OS" %1,%0" + :"=m" (ADDR) + :"dIr" (nr)); +} + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void __set_bit(long nr, volatile void * addr) +{ + __asm__( + "bts"__OS" %1,%0" + :"=m" (ADDR) + :"dIr" (nr)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static __inline__ void clear_bit(long nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btr"__OS" %1,%0" + :"=m" (ADDR) + :"dIr" (nr)); +} +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void __change_bit(long nr, volatile void * addr) +{ + __asm__ __volatile__( + "btc"__OS" %1,%0" + :"=m" (ADDR) + :"dIr" (nr)); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void change_bit(long nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btc"__OS" %1,%0" + :"=m" (ADDR) + :"dIr" (nr)); +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_set_bit(long nr, volatile void * addr) +{ + long oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "bts"__OS" %2,%1\n\tsbb"__OS" %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"dIr" (nr) : "memory"); + return oldbit; +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __inline__ int __test_and_set_bit(long nr, volatile void * addr) +{ + long oldbit; + + __asm__( + "bts"__OS" %2,%1\n\tsbb"__OS" %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"dIr" (nr)); + return oldbit; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_clear_bit(long nr, volatile void * addr) +{ + long oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btr"__OS" %2,%1\n\tsbb"__OS" %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"dIr" (nr) : "memory"); + return oldbit; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __inline__ int __test_and_clear_bit(long nr, volatile void * addr) +{ + long oldbit; + + __asm__( + "btr"__OS" %2,%1\n\tsbb"__OS" %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"dIr" (nr)); + return oldbit; +} + +/* WARNING: non atomic and it can be reordered! */ +static __inline__ int __test_and_change_bit(long nr, volatile void * addr) +{ + long oldbit; + + __asm__ __volatile__( + "btc"__OS" %2,%1\n\tsbb"__OS" %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"dIr" (nr) : "memory"); + return oldbit; +} + +/** + * test_and_change_bit - Change a bit and return its new value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_change_bit(long nr, volatile void * addr) +{ + long oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btc"__OS" %2,%1\n\tsbb"__OS" %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"dIr" (nr) : "memory"); + return oldbit; +} + + +static __inline__ int constant_test_bit(long nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int variable_test_bit(long nr, volatile void * addr) +{ + long oldbit; + + __asm__ __volatile__( + "bt"__OS" %2,%1\n\tsbb"__OS" %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"dIr" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + +/** + * find_first_zero_bit - find the first zero bit in a memory region + * @addr: The address to start the search at + * @size: The maximum bitnumber to search + * + * Returns the bit-number of the first zero bit, not the number of the byte + * containing a bit. -1 when none found. + */ +static __inline__ int find_first_zero_bit(void * addr, unsigned size) +{ + int d0, d1, d2; + int res; + + if (!size) + return 0; + __asm__ __volatile__( + "movl $-1,%%eax\n\t" + "xorl %%edx,%%edx\n\t" + "repe; scasl\n\t" + "je 1f\n\t" + "xorl -4(%%"__OP"di),%%eax\n\t" + "sub"__OS" $4,%%"__OP"di\n\t" + "bsfl %%eax,%%edx\n" + "1:\tsub"__OS" %%"__OP"bx,%%"__OP"di\n\t" + "shl"__OS" $3,%%"__OP"di\n\t" + "add"__OS" %%"__OP"di,%%"__OP"dx" + :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) + :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory"); + return res; +} + +/** + * find_next_zero_bit - find the first zero bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +static __inline__ int find_next_zero_bit (void * addr, int size, int offset) +{ + unsigned int * p = ((unsigned int *) addr) + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for zero in first byte + */ + __asm__("bsfl %1,%0\n\t" + "jne 1f\n\t" + "movl $32, %0\n" + "1:" + : "=r" (set) + : "r" (~(*p >> bit))); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No zero yet, search remaining full bytes for a zero + */ + res = find_first_zero_bit (p, size - 32 * (p - (unsigned int *) addr)); + return (offset + set + res); +} + +/** + * ffz - find first zero in word. + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +static __inline__ unsigned long ffz(unsigned long word) +{ + __asm__("bsf"__OS" %1,%0" + :"=r" (word) + :"r" (~word)); + return word; +} + +/** + * ffs - find first bit set + * @x: the word to search + * + * This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + */ +static __inline__ int ffs(int x) +{ + int r; + + __asm__("bsfl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "g" (x)); + return r+1; +} + +/** + * hweightN - returns the hamming weight of a N-bit word + * @x: the word to weigh + * + * The Hamming Weight of a number is the total number of bits set in it. + */ + +#define hweight32(x) generic_hweight32(x) +#define hweight16(x) generic_hweight16(x) +#define hweight8(x) generic_hweight8(x) + +#define ext2_set_bit __test_and_set_bit +#define ext2_clear_bit __test_and_clear_bit +#define ext2_test_bit test_bit +#define ext2_find_first_zero_bit find_first_zero_bit +#define ext2_find_next_zero_bit find_next_zero_bit + +/* Bitmap functions for the minix filesystem. */ +#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr) +#define minix_set_bit(nr,addr) __set_bit(nr,addr) +#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr) +#define minix_test_bit(nr,addr) test_bit(nr,addr) +#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) + +#endif /* _X86_BITOPS_H */ diff --git a/xen/include/asm-x86/cache.h b/xen/include/asm-x86/cache.h new file mode 100644 index 0000000000..2539a6f240 --- /dev/null +++ b/xen/include/asm-x86/cache.h @@ -0,0 +1,13 @@ +/* + * include/asm-x86/cache.h + */ +#ifndef __ARCH_X86_CACHE_H +#define __ARCH_X86_CACHE_H + +#include + +/* L1 cache line size */ +#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#endif diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h new file mode 100644 index 0000000000..309cfed5e0 --- /dev/null +++ b/xen/include/asm-x86/config.h @@ -0,0 +1,150 @@ +/****************************************************************************** + * config.h + * + * A Linux-style configuration list. + */ + +#ifndef __XEN_I386_CONFIG_H__ +#define __XEN_I386_CONFIG_H__ + +#define CONFIG_X86 1 + +#define CONFIG_SMP 1 +#define CONFIG_X86_LOCAL_APIC 1 +#define CONFIG_X86_IO_APIC 1 +#define CONFIG_X86_L1_CACHE_SHIFT 5 + +#define CONFIG_ACPI 1 +#define CONFIG_ACPI_BOOT 1 + +#define CONFIG_PCI 1 +#define CONFIG_PCI_BIOS 1 +#define CONFIG_PCI_DIRECT 1 + +#define CONFIG_IDE 1 +#define CONFIG_BLK_DEV_IDE 1 +#define CONFIG_BLK_DEV_IDEDMA 1 +#define CONFIG_BLK_DEV_IDEPCI 1 +#define CONFIG_IDEDISK_MULTI_MODE 1 +#define CONFIG_IDEDISK_STROKE 1 +#define CONFIG_IDEPCI_SHARE_IRQ 1 +#define CONFIG_BLK_DEV_IDEDMA_PCI 1 +#define CONFIG_IDEDMA_PCI_AUTO 1 +#define CONFIG_IDEDMA_AUTO 1 +#define CONFIG_IDEDMA_ONLYDISK 1 +#define CONFIG_BLK_DEV_IDE_MODES 1 +#define CONFIG_BLK_DEV_PIIX 1 + +#define CONFIG_SCSI 1 +#define CONFIG_SCSI_LOGGING 1 +#define CONFIG_BLK_DEV_SD 1 +#define CONFIG_SD_EXTRA_DEVS 40 +#define CONFIG_SCSI_MULTI_LUN 1 + +#define CONFIG_XEN_ATTENTION_KEY 1 + + +#define HZ 100 + +/* + * Just to keep compiler happy. + * NB. DO NOT CHANGE SMP_CACHE_BYTES WITHOUT FIXING arch/i386/entry.S!!! + * It depends on size of irq_cpustat_t, for example, being 64 bytes. :-) + * Mmmm... so niiiiiice.... + */ +#define SMP_CACHE_BYTES 64 +#define NR_CPUS 16 +#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +#define ____cacheline_aligned __cacheline_aligned + +/*** Hypervisor owns top 64MB of virtual address space. ***/ +#define HYPERVISOR_VIRT_START (0xFC000000UL) + +/* + * First 4MB are mapped read-only for all. It's for the machine->physical + * mapping table (MPT table). The following are virtual addresses. + */ +#define READONLY_MPT_VIRT_START (HYPERVISOR_VIRT_START) +#define READONLY_MPT_VIRT_END (READONLY_MPT_VIRT_START + (4*1024*1024)) +/* + * Next 12MB is fixed monitor space, which is part of a 40MB direct-mapped + * memory region. The following are machine addresses. + */ +#define MAX_MONITOR_ADDRESS (12*1024*1024) +#define MAX_DIRECTMAP_ADDRESS (40*1024*1024) +/* And the virtual addresses for the direct-map region... */ +#define DIRECTMAP_VIRT_START (READONLY_MPT_VIRT_END) +#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS) +#define MONITOR_VIRT_START (DIRECTMAP_VIRT_START) +#define MONITOR_VIRT_END (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS) +#define RDWR_MPT_VIRT_START (MONITOR_VIRT_END) +#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (4*1024*1024)) +#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END) +#define FRAMETABLE_VIRT_END (DIRECTMAP_VIRT_END) +/* Next 4MB of virtual address space is used as a linear p.t. mapping. */ +#define LINEAR_PT_VIRT_START (DIRECTMAP_VIRT_END) +#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + (4*1024*1024)) +/* Next 4MB of virtual address space is used as a shadow linear p.t. map. */ +#define SH_LINEAR_PT_VIRT_START (LINEAR_PT_VIRT_END) +#define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + (4*1024*1024)) +/* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */ +#define PERDOMAIN_VIRT_START (SH_LINEAR_PT_VIRT_END) +#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (4*1024*1024)) +#define GDT_VIRT_START (PERDOMAIN_VIRT_START) +#define GDT_VIRT_END (GDT_VIRT_START + (64*1024)) +#define LDT_VIRT_START (GDT_VIRT_END) +#define LDT_VIRT_END (LDT_VIRT_START + (64*1024)) +/* Penultimate 4MB of virtual address space used for domain page mappings. */ +#define MAPCACHE_VIRT_START (PERDOMAIN_VIRT_END) +#define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + (4*1024*1024)) +/* Final 4MB of virtual address space used for ioremap(). */ +#define IOREMAP_VIRT_START (MAPCACHE_VIRT_END) +#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (4*1024*1024)) + +/* + * Amount of slack domain memory to leave in system, in megabytes. + * Prevents a hard out-of-memory crunch for thinsg like network receive. + */ +#define SLACK_DOMAIN_MEM_KILOBYTES 2048 + +/* Linkage for x86 */ +#define FASTCALL(x) x __attribute__((regparm(3))) +#define asmlinkage __attribute__((regparm(0))) +#define __ALIGN .align 16,0x90 +#define __ALIGN_STR ".align 16,0x90" +#define SYMBOL_NAME_STR(X) #X +#define SYMBOL_NAME(X) X +#define SYMBOL_NAME_LABEL(X) X##: +#ifdef __ASSEMBLY__ +#define ALIGN __ALIGN +#define ALIGN_STR __ALIGN_STR +#define ENTRY(name) \ + .globl SYMBOL_NAME(name); \ + ALIGN; \ + SYMBOL_NAME_LABEL(name) +#endif + +#define PGT_base_page_table PGT_l2_page_table + +#define barrier() __asm__ __volatile__("": : :"memory") + +#define __HYPERVISOR_CS 0x0808 +#define __HYPERVISOR_DS 0x0810 + +#define NR_syscalls 256 + +#ifndef NDEBUG +#define MEMORY_GUARD +#endif + +#ifndef __ASSEMBLY__ +extern unsigned long _end; /* standard ELF symbol */ +extern void __out_of_line_bug(int line) __attribute__((noreturn)); +#define out_of_line_bug() __out_of_line_bug(__LINE__) +#endif /* __ASSEMBLY__ */ + +/* For generic assembly code: use macros to define operation/operand sizes. */ +#define __OS "l" /* Operation Suffix */ +#define __OP "e" /* Operand Prefix */ + +#endif /* __XEN_I386_CONFIG_H__ */ diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h new file mode 100644 index 0000000000..8b2e913bff --- /dev/null +++ b/xen/include/asm-x86/cpufeature.h @@ -0,0 +1,104 @@ +/* + * cpufeature.h + * + * Defines x86 CPU feature bits + */ + +#ifndef __ASM_X86_CPUFEATURE_H +#define __ASM_X86_CPUFEATURE_H + +/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */ +#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT) + +#define NCAPINTS 6 /* Currently we have 6 32-bit words worth of info */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ +#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ +#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ +#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ +#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ +#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ +#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ + /* of FPU context), and CR4.OSFXSR available */ +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ +#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ +#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ +/* cpu types for specific tunings: */ +#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */ +#define X86_FEATURE_P3 (3*32+ 6) /* P3 */ +#define X86_FEATURE_P4 (3*32+ 7) /* P4 */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ + +/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ +#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ + + +#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) +#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) + +#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) +#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) +#define cpu_has_de boot_cpu_has(X86_FEATURE_DE) +#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) +#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) +#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE) +#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) +#define cpu_has_sse2 boot_cpu_has(X86_FEATURE_XMM2) +#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) +#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) +#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR) +#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX) +#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR) +#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) +#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) +#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) +#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) +#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) +#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) +#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) + +#endif /* __ASM_X86_CPUFEATURE_H */ diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h new file mode 100644 index 0000000000..fdbc373b92 --- /dev/null +++ b/xen/include/asm-x86/current.h @@ -0,0 +1,52 @@ +#ifndef _X86_CURRENT_H +#define _X86_CURRENT_H + +struct task_struct; + +#define STACK_RESERVED \ + (sizeof(execution_context_t) + sizeof(struct task_struct *)) + +static inline struct task_struct * get_current(void) +{ + struct task_struct *current; + __asm__ ( "orl %%esp,%0; andl $~3,%0; movl (%0),%0" + : "=r" (current) : "0" (STACK_SIZE-4) ); + return current; +} + +#define current get_current() + +static inline void set_current(struct task_struct *p) +{ + __asm__ ( "orl %%esp,%0; andl $~3,%0; movl %1,(%0)" + : : "r" (STACK_SIZE-4), "r" (p) ); +} + +static inline execution_context_t *get_execution_context(void) +{ + execution_context_t *execution_context; + __asm__ ( "andl %%esp,%0; addl %2,%0" + : "=r" (execution_context) + : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) ); + return execution_context; +} + +static inline unsigned long get_stack_top(void) +{ + unsigned long p; + __asm__ ( "orl %%esp,%0; andl $~3,%0" + : "=r" (p) : "0" (STACK_SIZE-4) ); + return p; +} + +#define schedule_tail(_p) \ + __asm__ __volatile__ ( \ + "andl %%esp,%0; addl %2,%0; movl %0,%%esp; jmp *%1" \ + : : "r" (~(STACK_SIZE-1)), \ + "r" (unlikely(is_idle_task((_p))) ? \ + continue_cpu_idle_loop : \ + continue_nonidle_task), \ + "i" (STACK_SIZE-STACK_RESERVED) ) + + +#endif /* _X86_CURRENT_H */ diff --git a/xen/include/asm-x86/debugreg.h b/xen/include/asm-x86/debugreg.h new file mode 100644 index 0000000000..7ca7deced7 --- /dev/null +++ b/xen/include/asm-x86/debugreg.h @@ -0,0 +1,64 @@ +#ifndef _X86_DEBUGREG_H +#define _X86_DEBUGREG_H + + +/* Indicate the register numbers for a number of the specific + debug registers. Registers 0-3 contain the addresses we wish to trap on */ +#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */ +#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */ + +#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */ +#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */ + +/* Define a few things for the status register. We can use this to determine + which debugging register was responsible for the trap. The other bits + are either reserved or not of interest to us. */ + +#define DR_TRAP0 (0x1) /* db0 */ +#define DR_TRAP1 (0x2) /* db1 */ +#define DR_TRAP2 (0x4) /* db2 */ +#define DR_TRAP3 (0x8) /* db3 */ + +#define DR_STEP (0x4000) /* single-step */ +#define DR_SWITCH (0x8000) /* task switch */ + +/* Now define a bunch of things for manipulating the control register. + The top two bytes of the control register consist of 4 fields of 4 + bits - each field corresponds to one of the four debug registers, + and indicates what types of access we trap on, and how large the data + field is that we are looking at */ + +#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */ +#define DR_CONTROL_SIZE 4 /* 4 control bits per register */ + +#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */ +#define DR_RW_WRITE (0x1) +#define DR_RW_READ (0x3) + +#define DR_LEN_1 (0x0) /* Settings for data length to trap on */ +#define DR_LEN_2 (0x4) +#define DR_LEN_4 (0xC) + +/* The low byte to the control register determine which registers are + enabled. There are 4 fields of two bits. One bit is "local", meaning + that the processor will reset the bit after a task switch and the other + is global meaning that we have to explicitly reset the bit. With linux, + you can use either one, since we explicitly zero the register when we enter + kernel mode. */ + +#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ +#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ + +#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ +#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */ + +/* The second byte to the control register has a few special things. + We can slow the instruction pipeline for instructions coming via the + gdt or the ldt if we want to. I am not sure why this is an advantage */ + +#define DR_CONTROL_RESERVED (~0xFFFF03FFUL) /* Reserved by Intel */ +#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ +#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ + +#endif /* _X86_DEBUGREG_H */ diff --git a/xen/include/asm-x86/delay.h b/xen/include/asm-x86/delay.h new file mode 100644 index 0000000000..b294c41918 --- /dev/null +++ b/xen/include/asm-x86/delay.h @@ -0,0 +1,14 @@ +#ifndef _X86_DELAY_H +#define _X86_DELAY_H + +/* + * Copyright (C) 1993 Linus Torvalds + * + * Delay routines calling functions in arch/i386/lib/delay.c + */ + +extern unsigned long ticks_per_usec; +extern void __udelay(unsigned long usecs); +#define udelay(n) __udelay(n) + +#endif /* defined(_X86_DELAY_H) */ diff --git a/xen/include/asm-x86/desc.h b/xen/include/asm-x86/desc.h new file mode 100644 index 0000000000..780f9c8728 --- /dev/null +++ b/xen/include/asm-x86/desc.h @@ -0,0 +1,58 @@ +#ifndef __ARCH_DESC_H +#define __ARCH_DESC_H + +#define LDT_ENTRY_SIZE 8 + +#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY + +#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8) +#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1) + +#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY) +#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY) + +#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) ) + +/* + * Guest OS must provide its own code selectors, or use the one we provide. The + * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector + * value is okay. Note that checking only the RPL is insufficient: if the + * selector is poked into an interrupt, trap or call gate then the RPL is + * ignored when the gate is accessed. + */ +#define VALID_SEL(_s) \ + (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \ + (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \ + ((_s)&4)) && \ + (((_s)&3) == 1)) +#define VALID_CODESEL(_s) ((_s) == FLAT_RING1_CS || VALID_SEL(_s)) + +/* These are bitmasks for the first 32 bits of a descriptor table entry. */ +#define _SEGMENT_TYPE (15<< 8) +#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */ +#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */ +#define _SEGMENT_P ( 1<<15) /* Segment Present */ +#define _SEGMENT_G ( 1<<23) /* Granularity */ + +#ifndef __ASSEMBLY__ +struct desc_struct { + unsigned long a,b; +}; + +extern struct desc_struct gdt_table[]; +extern struct desc_struct *idt, *gdt; + +struct Xgt_desc_struct { + unsigned short size; + unsigned long address __attribute__((packed)); +}; + +#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2)) +#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2)) + +extern void set_intr_gate(unsigned int irq, void * addr); +extern void set_tss_desc(unsigned int n, void *addr); + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/xen/include/asm-x86/div64.h b/xen/include/asm-x86/div64.h new file mode 100644 index 0000000000..ef915df700 --- /dev/null +++ b/xen/include/asm-x86/div64.h @@ -0,0 +1,17 @@ +#ifndef __I386_DIV64 +#define __I386_DIV64 + +#define do_div(n,base) ({ \ + unsigned long __upper, __low, __high, __mod; \ + asm("":"=a" (__low), "=d" (__high):"A" (n)); \ + __upper = __high; \ + if (__high) { \ + __upper = __high % (base); \ + __high = __high / (base); \ + } \ + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (base), "0" (__low), "1" (__upper)); \ + asm("":"=A" (n):"a" (__low),"d" (__high)); \ + __mod; \ +}) + +#endif diff --git a/xen/include/asm-x86/domain_page.h b/xen/include/asm-x86/domain_page.h new file mode 100644 index 0000000000..d8cdf0b74e --- /dev/null +++ b/xen/include/asm-x86/domain_page.h @@ -0,0 +1,29 @@ +/****************************************************************************** + * domain_page.h + * + * Allow temporary mapping of domain page frames into Xen space. + */ + +#ifndef __ASM_DOMAIN_PAGE_H__ +#define __ASM_DOMAIN_PAGE_H__ + +#include +#include + +extern unsigned long *mapcache; +#define MAPCACHE_ENTRIES 1024 + +/* + * Maps a given physical address, returning corresponding virtual address. + * The entire page containing that VA is now accessible until a + * corresponding call to unmap_domain_mem(). + */ +extern void *map_domain_mem(unsigned long pa); + +/* + * Pass a VA within a page previously mapped with map_domain_mem(). + * That page will then be removed from the mapping lists. + */ +extern void unmap_domain_mem(void *va); + +#endif /* __ASM_DOMAIN_PAGE_H__ */ diff --git a/xen/include/asm-x86/fixmap.h b/xen/include/asm-x86/fixmap.h new file mode 100644 index 0000000000..fcfa97aee9 --- /dev/null +++ b/xen/include/asm-x86/fixmap.h @@ -0,0 +1,112 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#ifndef _ASM_FIXMAP_H +#define _ASM_FIXMAP_H + +#include +#include +#include +#include + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * highger than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ + +/* + * on UP currently we will have no trace of the fixmap mechanizm, + * no page table allocations, etc. This might change in the + * future, say framebuffers for the console driver(s) could be + * fix-mapped? + */ +enum fixed_addresses { +#ifdef CONFIG_X86_LOCAL_APIC + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ +#endif +#ifdef CONFIG_X86_IO_APIC + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, +#endif +#ifdef CONFIG_HIGHMEM + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif +#ifdef CONFIG_ACPI_BOOT + FIX_ACPI_BEGIN, + FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, +#endif + __end_of_fixed_addresses +}; + +extern void __set_fixmap (enum fixed_addresses idx, + l1_pgentry_t entry); + +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR)) +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE)) +/* + * used by vmalloc.c. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap, and leave one page empty + * at the top of mem.. + */ +#define FIXADDR_TOP (0xffffe000UL) +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) + +extern void __this_fixmap_does_not_exist(void); + +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without tranlation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static inline unsigned long fix_to_virt(const unsigned int idx) +{ + /* + * this branch gets completely eliminated after inlining, + * except when someone tries to use fixaddr indices in an + * illegal way. (such as mixing up address types or using + * out-of-range indices). + * + * If it doesn't get removed, the linker will complain + * loudly with a reasonably clear error message.. + */ + if (idx >= __end_of_fixed_addresses) + __this_fixmap_does_not_exist(); + + return __fix_to_virt(idx); +} + +#endif diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h new file mode 100644 index 0000000000..5cc60f4368 --- /dev/null +++ b/xen/include/asm-x86/flushtlb.h @@ -0,0 +1,86 @@ +/****************************************************************************** + * flushtlb.h + * + * TLB flushes are timestamped using a global virtual 'clock' which ticks + * on any TLB flush on any processor. + * + * Copyright (c) 2003, K A Fraser + */ + +#ifndef __FLUSHTLB_H__ +#define __FLUSHTLB_H__ + +#include +#include + +/* + * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed. + * Therefore, if the current TLB time and a previously-read timestamp differ + * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock + * has wrapped at least once and every CPU's TLB is guaranteed to have been + * flushed meanwhile. + * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock. + */ +#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1) + +/* + * 'cpu_stamp' is the current timestamp for the CPU we are testing. + * 'lastuse_stamp' is a timestamp taken when the PFN we are testing was last + * used for a purpose that may have caused the CPU's TLB to become tainted. + */ +static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp) +{ + /* + * Why does this work? + * 1. XOR sets high-order bits determines if stamps from differing epochs. + * 2. Subtraction sets high-order bits if 'cpu_stamp > lastuse_stamp'. + * In either case a flush is unnecessary: we therefore OR the results from + * (1) and (2), mask the high-order bits, and return the inverse. + */ + return !(((lastuse_stamp^cpu_stamp)|(lastuse_stamp-cpu_stamp)) & + ~TLBCLOCK_EPOCH_MASK); +} + +extern u32 tlbflush_clock; +extern u32 tlbflush_time[NR_CPUS]; + +extern void tlb_clocktick(void); +extern void new_tlbflush_clock_period(void); + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + * + * ..but the i386 has somewhat limited tlb flushing capabilities, + * and page-granular flushes are available only on i486 and up. + */ + +#ifndef CONFIG_SMP + +#define flush_tlb() __flush_tlb() +#define flush_tlb_all() __flush_tlb() +#define flush_tlb_all_pge() __flush_tlb_pge() +#define local_flush_tlb() __flush_tlb() +#define flush_tlb_cpu(_cpu) __flush_tlb() +#define flush_tlb_mask(_mask) __flush_tlb() +#define try_flush_tlb_mask(_mask) __flush_tlb() + +#else + +#include + +extern int try_flush_tlb_mask(unsigned long mask); +extern void flush_tlb_mask(unsigned long mask); +extern void flush_tlb_all_pge(void); + +#define flush_tlb() __flush_tlb() +#define flush_tlb_all() flush_tlb_mask((1 << smp_num_cpus) - 1) +#define local_flush_tlb() __flush_tlb() +#define flush_tlb_cpu(_cpu) flush_tlb_mask(1 << (_cpu)) + +#endif + +#endif /* __FLUSHTLB_H__ */ diff --git a/xen/include/asm-x86/hardirq.h b/xen/include/asm-x86/hardirq.h new file mode 100644 index 0000000000..5b3cb77c91 --- /dev/null +++ b/xen/include/asm-x86/hardirq.h @@ -0,0 +1,92 @@ +#ifndef __ASM_HARDIRQ_H +#define __ASM_HARDIRQ_H + +#include +#include + +/* assembly code in softirq.h is sensitive to the offsets of these fields */ +typedef struct { + unsigned int __softirq_pending; + unsigned int __local_irq_count; + unsigned int __local_bh_count; + unsigned int __syscall_count; + unsigned int __nmi_count; + unsigned long idle_timestamp; +} ____cacheline_aligned irq_cpustat_t; + +#include /* Standard mappings for irq_cpustat_t above */ + +/* + * Are we in an interrupt context? Either doing bottom half + * or hardware interrupt processing? + */ +#define in_interrupt() ({ int __cpu = smp_processor_id(); \ + (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }) + +#define in_irq() (local_irq_count(smp_processor_id()) != 0) + +#ifndef CONFIG_SMP + +#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0) +#define hardirq_endlock(cpu) do { } while (0) + +#define irq_enter(cpu, irq) (local_irq_count(cpu)++) +#define irq_exit(cpu, irq) (local_irq_count(cpu)--) + +#define synchronize_irq() barrier() + +#else + +#include +#include + +extern unsigned char global_irq_holder; +extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */ + +static inline int irqs_running (void) +{ + int i; + + for (i = 0; i < smp_num_cpus; i++) + if (local_irq_count(i)) + return 1; + return 0; +} + +static inline void release_irqlock(int cpu) +{ + /* if we didn't own the irq lock, just ignore.. */ + if (global_irq_holder == (unsigned char) cpu) { + global_irq_holder = NO_PROC_ID; + clear_bit(0,&global_irq_lock); + } +} + +static inline void irq_enter(int cpu, int irq) +{ + ++local_irq_count(cpu); + + smp_mb(); + + while (test_bit(0,&global_irq_lock)) { + cpu_relax(); + } +} + +static inline void irq_exit(int cpu, int irq) +{ + --local_irq_count(cpu); +} + +static inline int hardirq_trylock(int cpu) +{ + return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock); +} + +#define hardirq_endlock(cpu) do { } while (0) + +extern void synchronize_irq(void); + +#endif /* CONFIG_SMP */ + +#endif /* __ASM_HARDIRQ_H */ diff --git a/xen/include/asm-x86/i387.h b/xen/include/asm-x86/i387.h new file mode 100644 index 0000000000..95a6bb6cde --- /dev/null +++ b/xen/include/asm-x86/i387.h @@ -0,0 +1,38 @@ +/* + * include/asm-i386/i387.h + * + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + */ + +#ifndef __ASM_I386_I387_H +#define __ASM_I386_I387_H + +#include +#include + +extern void init_fpu(void); +extern void save_init_fpu( struct task_struct *tsk ); +extern void restore_fpu( struct task_struct *tsk ); + +#define unlazy_fpu( tsk ) do { \ + if ( test_bit(PF_USEDFPU, &tsk->flags) ) \ + save_init_fpu( tsk ); \ +} while (0) + +#define clear_fpu( tsk ) do { \ + if ( test_and_clear_bit(PF_USEDFPU, &tsk->flags) ) { \ + asm volatile("fwait"); \ + stts(); \ + } \ +} while (0) + +#define load_mxcsr( val ) do { \ + unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \ + asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \ +} while (0) + +#endif /* __ASM_I386_I387_H */ diff --git a/xen/include/asm-x86/io.h b/xen/include/asm-x86/io.h new file mode 100644 index 0000000000..c88648aa76 --- /dev/null +++ b/xen/include/asm-x86/io.h @@ -0,0 +1,99 @@ +#ifndef _ASM_IO_H +#define _ASM_IO_H + +#include +#include + +#define IO_SPACE_LIMIT 0xffff + +/** + * virt_to_phys - map virtual addresses to physical + * @address: address to remap + * + * The returned physical address is the physical (CPU) mapping for + * the memory address given. It is only valid to use this function on + * addresses directly mapped or allocated via kmalloc. + * + * This function does not give bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline unsigned long virt_to_phys(volatile void * address) +{ + return __pa(address); +} + +/** + * phys_to_virt - map physical address to virtual + * @address: address to remap + * + * The returned virtual address is a current CPU mapping for + * the memory address given. It is only valid to use this function on + * addresses that have a kernel mapping + * + * This function does not handle bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline void * phys_to_virt(unsigned long address) +{ + return __va(address); +} + +/* + * Change "struct pfn_info" to physical address. + */ +#ifdef CONFIG_HIGHMEM64G +#define page_to_phys(page) ((u64)(page - frame_table) << PAGE_SHIFT) +#else +#define page_to_phys(page) ((page - frame_table) << PAGE_SHIFT) +#endif + +#define page_to_pfn(_page) ((unsigned long)((_page) - frame_table)) +#define page_to_virt(_page) phys_to_virt(page_to_phys(_page)) + + +/* + * IO bus memory addresses are also 1:1 with the physical address + */ +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt +#define page_to_bus page_to_phys + +#define __OUT1(s,x) \ +static inline void out##s(unsigned x value, unsigned short port) { + +#define __OUT2(s,s1,s2) \ +__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" + +#define __OUT(s,s1,x) \ +__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port));} + +#define __IN1(s) \ +static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; + +#define __IN2(s,s1,s2) \ +__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" + +#define __IN(s,s1,i...) \ +__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } + +#define RETURN_TYPE unsigned char +__IN(b,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned short +__IN(w,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned int +__IN(l,"") +#undef RETURN_TYPE + +__OUT(b,"b",char) +__OUT(w,"w",short) +__OUT(l,,int) + +#endif diff --git a/xen/include/asm-x86/io_apic.h b/xen/include/asm-x86/io_apic.h new file mode 100644 index 0000000000..8b94875891 --- /dev/null +++ b/xen/include/asm-x86/io_apic.h @@ -0,0 +1,167 @@ +#ifndef __ASM_IO_APIC_H +#define __ASM_IO_APIC_H + +#include +#include + +/* + * Intel IO-APIC support for SMP and UP systems. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar + */ + +#ifdef CONFIG_X86_IO_APIC + +#define APIC_MISMATCH_DEBUG + +#define IO_APIC_BASE(idx) \ + ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \ + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK))) + +/* + * The structure of the IO-APIC: + */ +struct IO_APIC_reg_00 { + __u32 __reserved_2 : 14, + LTS : 1, + delivery_type : 1, + __reserved_1 : 8, + ID : 4, + __reserved_0 : 4; +} __attribute__ ((packed)); + +struct IO_APIC_reg_01 { + __u32 version : 8, + __reserved_2 : 7, + PRQ : 1, + entries : 8, + __reserved_1 : 8; +} __attribute__ ((packed)); + +struct IO_APIC_reg_02 { + __u32 __reserved_2 : 24, + arbitration : 4, + __reserved_1 : 4; +} __attribute__ ((packed)); + +struct IO_APIC_reg_03 { + __u32 boot_DT : 1, + __reserved_1 : 31; +} __attribute__ ((packed)); + +/* + * # of IO-APICs and # of IRQ routing registers + */ +extern int nr_ioapics; +extern int nr_ioapic_registers[MAX_IO_APICS]; + +enum ioapic_irq_destination_types { + dest_Fixed = 0, + dest_LowestPrio = 1, + dest_SMI = 2, + dest__reserved_1 = 3, + dest_NMI = 4, + dest_INIT = 5, + dest__reserved_2 = 6, + dest_ExtINT = 7 +}; + +struct IO_APIC_route_entry { + __u32 vector : 8, + delivery_mode : 3, /* 000: FIXED + * 001: lowest prio + * 111: ExtINT + */ + dest_mode : 1, /* 0: physical, 1: logical */ + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, /* 0: edge, 1: level */ + mask : 1, /* 0: enabled, 1: disabled */ + __reserved_2 : 15; + + union { struct { __u32 + __reserved_1 : 24, + physical_dest : 4, + __reserved_2 : 4; + } physical; + + struct { __u32 + __reserved_1 : 24, + logical_dest : 8; + } logical; + } dest; + +} __attribute__ ((packed)); + +/* + * MP-BIOS irq configuration table structures: + */ + +/* I/O APIC entries */ +extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; + +/* # of MP IRQ source entries */ +extern int mp_irq_entries; + +/* MP IRQ source entries */ +extern struct mpc_config_intsrc *mp_irqs; + +/* non-0 if default (table-less) MP configuration */ +extern int mpc_default_type; + +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ + *IO_APIC_BASE(apic) = reg; + return *(IO_APIC_BASE(apic)+4); +} + +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +{ + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = value; +} + +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) +{ + (void) *(IO_APIC_BASE(apic)+4); +} + +/* + * If we use the IO-APIC for IRQ routing, disable automatic + * assignment of PCI IRQ's. + */ +#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup) + +#ifdef CONFIG_ACPI_BOOT +extern int io_apic_get_unique_id (int ioapic, int apic_id); +extern int io_apic_get_version (int ioapic); +extern int io_apic_get_redir_entries (int ioapic); +extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low); +#endif + +extern int skip_ioapic_setup; /* 1 for "noapic" */ + +static inline void disable_ioapic_setup(void) +{ + skip_ioapic_setup = 1; +} + +static inline int ioapic_setup_disabled(void) +{ + return skip_ioapic_setup; +} + +#else /* !CONFIG_X86_IO_APIC */ +#define io_apic_assign_pci_irqs 0 + +static inline void disable_ioapic_setup(void) +{ } + +#endif /* !CONFIG_X86_IO_APIC */ + +#endif diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h new file mode 100644 index 0000000000..2c7c67a0da --- /dev/null +++ b/xen/include/asm-x86/irq.h @@ -0,0 +1,204 @@ +#ifndef _ASM_HW_IRQ_H +#define _ASM_HW_IRQ_H + +/* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar */ + +#include +#include + +#define SA_INTERRUPT 0x20000000 +#define SA_SHIRQ 0x04000000 +#define SA_NOPROFILE 0x02000000 + +#define SA_SAMPLE_RANDOM 0 /* Linux driver compatibility */ + +#define TIMER_IRQ 0 + +extern void disable_irq(unsigned int); +extern void disable_irq_nosync(unsigned int); +extern void enable_irq(unsigned int); + +/* + * IDT vectors usable for external interrupt sources start + * at 0x20: + */ +#define FIRST_EXTERNAL_VECTOR 0x30 + +#define NR_IRQS (256 - FIRST_EXTERNAL_VECTOR) + +#define HYPERVISOR_CALL_VECTOR 0x82 + +/* + * Vectors 0x30-0x3f are used for ISA interrupts. + */ + +/* + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff + * + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + * + * Vectors 0xf0-0xfa are free (reserved for future Linux use). + */ +#define SPURIOUS_APIC_VECTOR 0xff +#define ERROR_APIC_VECTOR 0xfe +#define INVALIDATE_TLB_VECTOR 0xfd +#define EVENT_CHECK_VECTOR 0xfc +#define CALL_FUNCTION_VECTOR 0xfb +#define KDB_VECTOR 0xfa + +/* + * Local APIC timer IRQ vector is on a different priority level, + * to work around the 'lost local interrupt if more than 2 IRQ + * sources per level' errata. + */ +#define LOCAL_TIMER_VECTOR 0xef + +/* + * First APIC vector available to drivers: (vectors 0x40-0xee) + * we start at 0x41 to spread out vectors evenly between priority + * levels. (0x82 is the syscall vector) + */ +#define FIRST_DEVICE_VECTOR 0x41 +#define FIRST_SYSTEM_VECTOR 0xef + +extern int irq_vector[NR_IRQS]; +#define IO_APIC_VECTOR(irq) irq_vector[irq] + +/* + * Various low-level irq details needed by irq.c, process.c, + * time.c, io_apic.c and smp.c + * + * Interrupt entry/exit code at both C and assembly level + */ + +extern void mask_irq(unsigned int irq); +extern void unmask_irq(unsigned int irq); +extern void disable_8259A_irq(unsigned int irq); +extern void enable_8259A_irq(unsigned int irq); +extern int i8259A_irq_pending(unsigned int irq); +extern void make_8259A_irq(unsigned int irq); +extern void init_8259A(int aeoi); +extern void FASTCALL(send_IPI_self(int vector)); +extern void init_VISWS_APIC_irqs(void); +extern void setup_IO_APIC(void); +extern void disable_IO_APIC(void); +extern void print_IO_APIC(void); +extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); +extern void send_IPI(int dest, int vector); + +extern unsigned long io_apic_irqs; + +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +extern char _stext, _etext; + +#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) + +#define __STR(x) #x +#define STR(x) __STR(x) + +#define SAVE_ALL \ + "cld\n\t" \ + "pushl %gs\n\t" \ + "pushl %fs\n\t" \ + "pushl %es\n\t" \ + "pushl %ds\n\t" \ + "pushl %eax\n\t" \ + "pushl %ebp\n\t" \ + "pushl %edi\n\t" \ + "pushl %esi\n\t" \ + "pushl %edx\n\t" \ + "pushl %ecx\n\t" \ + "pushl %ebx\n\t" \ + "movl $" STR(__HYPERVISOR_DS) ",%edx\n\t" \ + "movl %edx,%ds\n\t" \ + "movl %edx,%es\n\t" \ + "movl %edx,%fs\n\t" \ + "movl %edx,%gs\n\t" + +#define IRQ_NAME2(nr) nr##_interrupt(void) +#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) + +/* + * SMP has a few special interrupts for IPI messages + */ + + /* there is a second layer of macro just to get the symbolic + name for the vector evaluated. This change is for RTLinux */ +#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v) +#define XBUILD_SMP_INTERRUPT(x,v)\ +asmlinkage void x(void); \ +asmlinkage void call_##x(void); \ +__asm__( \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(x) ":\n\t" \ + "pushl $"#v"-256\n\t" \ + SAVE_ALL \ + SYMBOL_NAME_STR(call_##x)":\n\t" \ + "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ + "jmp ret_from_intr\n"); + +#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v) +#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \ +asmlinkage void x(struct pt_regs * regs); \ +asmlinkage void call_##x(void); \ +__asm__( \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(x) ":\n\t" \ + "pushl $"#v"-256\n\t" \ + SAVE_ALL \ + "movl %esp,%eax\n\t" \ + "pushl %eax\n\t" \ + SYMBOL_NAME_STR(call_##x)":\n\t" \ + "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ + "addl $4,%esp\n\t" \ + "jmp ret_from_intr\n"); + +#define BUILD_COMMON_IRQ() \ +asmlinkage void call_do_IRQ(void); \ +__asm__( \ + "\n" __ALIGN_STR"\n" \ + "common_interrupt:\n\t" \ + SAVE_ALL \ + SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \ + "call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \ + "jmp ret_from_intr\n"); + +/* + * subtle. orig_eax is used by the signal code to distinct between + * system calls and interrupted 'random user-space'. Thus we have + * to put a negative value into orig_eax here. (the problem is that + * both system calls and IRQs want to have small integer numbers in + * orig_eax, and the syscall code has won the optimization conflict ;) + * + * Subtle as a pigs ear. VY + */ + +#define BUILD_IRQ(nr) \ +asmlinkage void IRQ_NAME(nr); \ +__asm__( \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ + "pushl $"#nr"-256\n\t" \ + "jmp common_interrupt"); + +extern unsigned long prof_cpu_mask; +extern unsigned int * prof_buffer; +extern unsigned long prof_len; +extern unsigned long prof_shift; + +#include + +#if defined(CONFIG_X86_IO_APIC) +static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) { + if (IO_APIC_IRQ(i)) + send_IPI_self(IO_APIC_VECTOR(i)); +} +#else +static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {} +#endif + +#endif /* _ASM_HW_IRQ_H */ diff --git a/xen/include/asm-x86/ldt.h b/xen/include/asm-x86/ldt.h new file mode 100644 index 0000000000..4da2a15afc --- /dev/null +++ b/xen/include/asm-x86/ldt.h @@ -0,0 +1,29 @@ +#ifndef __ARCH_LDT_H +#define __ARCH_LDT_H + +#ifndef __ASSEMBLY__ + +static inline void load_LDT(struct task_struct *p) +{ + unsigned int cpu; + struct desc_struct *desc; + unsigned long ents; + + if ( (ents = p->mm.ldt_ents) == 0 ) + { + __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) ); + } + else + { + cpu = smp_processor_id(); + desc = (struct desc_struct *)GET_GDT_ADDRESS(p) + __LDT(cpu); + desc->a = ((LDT_VIRT_START&0xffff)<<16) | (ents*8-1); + desc->b = (LDT_VIRT_START&(0xff<<24)) | 0x8200 | + ((LDT_VIRT_START&0xff0000)>>16); + __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) ); + } +} + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/xen/include/asm-x86/mc146818rtc.h b/xen/include/asm-x86/mc146818rtc.h new file mode 100644 index 0000000000..8758528f7c --- /dev/null +++ b/xen/include/asm-x86/mc146818rtc.h @@ -0,0 +1,113 @@ +/* + * Machine dependent access functions for RTC registers. + */ +#ifndef _ASM_MC146818RTC_H +#define _ASM_MC146818RTC_H + +#include +#include + +extern spinlock_t rtc_lock; /* serialize CMOS RAM access */ + +/********************************************************************** + * register summary + **********************************************************************/ +#define RTC_SECONDS 0 +#define RTC_SECONDS_ALARM 1 +#define RTC_MINUTES 2 +#define RTC_MINUTES_ALARM 3 +#define RTC_HOURS 4 +#define RTC_HOURS_ALARM 5 +/* RTC_*_alarm is always true if 2 MSBs are set */ +# define RTC_ALARM_DONT_CARE 0xC0 + +#define RTC_DAY_OF_WEEK 6 +#define RTC_DAY_OF_MONTH 7 +#define RTC_MONTH 8 +#define RTC_YEAR 9 + +/* control registers - Moto names + */ +#define RTC_REG_A 10 +#define RTC_REG_B 11 +#define RTC_REG_C 12 +#define RTC_REG_D 13 + +/********************************************************************** + * register details + **********************************************************************/ +#define RTC_FREQ_SELECT RTC_REG_A + +/* update-in-progress - set to "1" 244 microsecs before RTC goes off the bus, + * reset after update (may take 1.984ms @ 32768Hz RefClock) is complete, + * totalling to a max high interval of 2.228 ms. + */ +# define RTC_UIP 0x80 +# define RTC_DIV_CTL 0x70 + /* divider control: refclock values 4.194 / 1.049 MHz / 32.768 kHz */ +# define RTC_REF_CLCK_4MHZ 0x00 +# define RTC_REF_CLCK_1MHZ 0x10 +# define RTC_REF_CLCK_32KHZ 0x20 + /* 2 values for divider stage reset, others for "testing purposes only" */ +# define RTC_DIV_RESET1 0x60 +# define RTC_DIV_RESET2 0x70 + /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ +# define RTC_RATE_SELECT 0x0F + +/**********************************************************************/ +#define RTC_CONTROL RTC_REG_B +# define RTC_SET 0x80 /* disable updates for clock setting */ +# define RTC_PIE 0x40 /* periodic interrupt enable */ +# define RTC_AIE 0x20 /* alarm interrupt enable */ +# define RTC_UIE 0x10 /* update-finished interrupt enable */ +# define RTC_SQWE 0x08 /* enable square-wave output */ +# define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ +# define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ +# define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */ + +/**********************************************************************/ +#define RTC_INTR_FLAGS RTC_REG_C +/* caution - cleared by read */ +# define RTC_IRQF 0x80 /* any of the following 3 is active */ +# define RTC_PF 0x40 +# define RTC_AF 0x20 +# define RTC_UF 0x10 + +/**********************************************************************/ +#define RTC_VALID RTC_REG_D +# define RTC_VRT 0x80 /* valid RAM and time */ +/**********************************************************************/ + +/* example: !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) + * determines if the following two #defines are needed + */ +#ifndef BCD_TO_BIN +#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10) +#endif + +#ifndef BIN_TO_BCD +#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10) +#endif + + +#ifndef RTC_PORT +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ +#endif + +/* + * The yet supported machines all access the RTC index register via + * an ISA port access but the way to access the date register differs ... + */ +#define CMOS_READ(addr) ({ \ +outb_p((addr),RTC_PORT(0)); \ +inb_p(RTC_PORT(1)); \ +}) +#define CMOS_WRITE(val, addr) ({ \ +outb_p((addr),RTC_PORT(0)); \ +outb_p((val),RTC_PORT(1)); \ +}) + +#define RTC_IRQ 8 + +#endif /* _ASM_MC146818RTC_H */ diff --git a/xen/include/asm-x86/mpspec.h b/xen/include/asm-x86/mpspec.h new file mode 100644 index 0000000000..1e73671c25 --- /dev/null +++ b/xen/include/asm-x86/mpspec.h @@ -0,0 +1,242 @@ +#ifndef __ASM_MPSPEC_H +#define __ASM_MPSPEC_H + +#include +#include + +/* + * Structure definitions for SMP machines following the + * Intel Multiprocessing Specification 1.1 and 1.4. + */ + +/* + * This tag identifies where the SMP configuration + * information is. + */ + +#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_') + +/* + * a maximum of 16 APICs with the current APIC ID architecture. + * xAPICs can have up to 256. SAPICs have 16 ID bits. + */ +#ifdef CONFIG_X86_CLUSTERED_APIC +#define MAX_APICS 256 +#else +#define MAX_APICS 16 +#endif + +#define MAX_MPC_ENTRY 1024 + +struct intel_mp_floating +{ + char mpf_signature[4]; /* "_MP_" */ + unsigned int mpf_physptr; /* Configuration table address */ + unsigned char mpf_length; /* Our length (paragraphs) */ + unsigned char mpf_specification;/* Specification version */ + unsigned char mpf_checksum; /* Checksum (makes sum 0) */ + unsigned char mpf_feature1; /* Standard or configuration ? */ + unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ + unsigned char mpf_feature3; /* Unused (0) */ + unsigned char mpf_feature4; /* Unused (0) */ + unsigned char mpf_feature5; /* Unused (0) */ +}; + +struct mp_config_table +{ + char mpc_signature[4]; +#define MPC_SIGNATURE "PCMP" + unsigned short mpc_length; /* Size of table */ + char mpc_spec; /* 0x01 */ + char mpc_checksum; + char mpc_oem[8]; + char mpc_productid[12]; + unsigned int mpc_oemptr; /* 0 if not present */ + unsigned short mpc_oemsize; /* 0 if not present */ + unsigned short mpc_oemcount; + unsigned int mpc_lapic; /* APIC address */ + unsigned int reserved; +}; + +/* Followed by entries */ + +#define MP_PROCESSOR 0 +#define MP_BUS 1 +#define MP_IOAPIC 2 +#define MP_INTSRC 3 +#define MP_LINTSRC 4 +#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */ + +struct mpc_config_processor +{ + unsigned char mpc_type; + unsigned char mpc_apicid; /* Local APIC number */ + unsigned char mpc_apicver; /* Its versions */ + unsigned char mpc_cpuflag; +#define CPU_ENABLED 1 /* Processor is available */ +#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ + unsigned int mpc_cpufeature; +#define CPU_STEPPING_MASK 0x0F +#define CPU_MODEL_MASK 0xF0 +#define CPU_FAMILY_MASK 0xF00 + unsigned int mpc_featureflag; /* CPUID feature value */ + unsigned int mpc_reserved[2]; +}; + +struct mpc_config_bus +{ + unsigned char mpc_type; + unsigned char mpc_busid; + unsigned char mpc_bustype[6] __attribute((packed)); +}; + +/* List of Bus Type string values, Intel MP Spec. */ +#define BUSTYPE_EISA "EISA" +#define BUSTYPE_ISA "ISA" +#define BUSTYPE_INTERN "INTERN" /* Internal BUS */ +#define BUSTYPE_MCA "MCA" +#define BUSTYPE_VL "VL" /* Local bus */ +#define BUSTYPE_PCI "PCI" +#define BUSTYPE_PCMCIA "PCMCIA" +#define BUSTYPE_CBUS "CBUS" +#define BUSTYPE_CBUSII "CBUSII" +#define BUSTYPE_FUTURE "FUTURE" +#define BUSTYPE_MBI "MBI" +#define BUSTYPE_MBII "MBII" +#define BUSTYPE_MPI "MPI" +#define BUSTYPE_MPSA "MPSA" +#define BUSTYPE_NUBUS "NUBUS" +#define BUSTYPE_TC "TC" +#define BUSTYPE_VME "VME" +#define BUSTYPE_XPRESS "XPRESS" + +struct mpc_config_ioapic +{ + unsigned char mpc_type; + unsigned char mpc_apicid; + unsigned char mpc_apicver; + unsigned char mpc_flags; +#define MPC_APIC_USABLE 0x01 + unsigned int mpc_apicaddr; +}; + +struct mpc_config_intsrc +{ + unsigned char mpc_type; + unsigned char mpc_irqtype; + unsigned short mpc_irqflag; + unsigned char mpc_srcbus; + unsigned char mpc_srcbusirq; + unsigned char mpc_dstapic; + unsigned char mpc_dstirq; +}; + +enum mp_irq_source_types { + mp_INT = 0, + mp_NMI = 1, + mp_SMI = 2, + mp_ExtINT = 3 +}; + +#define MP_IRQDIR_DEFAULT 0 +#define MP_IRQDIR_HIGH 1 +#define MP_IRQDIR_LOW 3 + + +struct mpc_config_lintsrc +{ + unsigned char mpc_type; + unsigned char mpc_irqtype; + unsigned short mpc_irqflag; + unsigned char mpc_srcbusid; + unsigned char mpc_srcbusirq; + unsigned char mpc_destapic; +#define MP_APIC_ALL 0xFF + unsigned char mpc_destapiclint; +}; + +struct mp_config_oemtable +{ + char oem_signature[4]; +#define MPC_OEM_SIGNATURE "_OEM" + unsigned short oem_length; /* Size of table */ + char oem_rev; /* 0x01 */ + char oem_checksum; + char mpc_oem[8]; +}; + +struct mpc_config_translation +{ + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; +}; + +/* + * Default configurations + * + * 1 2 CPU ISA 82489DX + * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining + * 3 2 CPU EISA 82489DX + * 4 2 CPU MCA 82489DX + * 5 2 CPU ISA+PCI + * 6 2 CPU EISA+PCI + * 7 2 CPU MCA+PCI + */ + +#ifdef CONFIG_MULTIQUAD +#define MAX_IRQ_SOURCES 512 +#else /* !CONFIG_MULTIQUAD */ +#define MAX_IRQ_SOURCES 256 +#endif /* CONFIG_MULTIQUAD */ + +#define MAX_MP_BUSSES 32 +enum mp_bustype { + MP_BUS_ISA = 1, + MP_BUS_EISA, + MP_BUS_PCI, + MP_BUS_MCA +}; +extern int *mp_bus_id_to_type; +extern int *mp_bus_id_to_node; +extern int *mp_bus_id_to_local; +extern int *mp_bus_id_to_pci_bus; +extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; + +extern unsigned int boot_cpu_physical_apicid; +/*extern unsigned long phys_cpu_present_map;*/ +extern int smp_found_config; +extern void find_smp_config (void); +extern void get_smp_config (void); +/*extern int nr_ioapics;*/ +extern int apic_version [MAX_APICS]; +/*extern int mp_irq_entries;*/ +/*extern struct mpc_config_intsrc *mp_irqs;*/ +/*extern int mpc_default_type;*/ +extern int mp_current_pci_id; +extern unsigned long mp_lapic_addr; +/*extern int pic_mode;*/ +extern int using_apic_timer; + +#ifdef CONFIG_ACPI_BOOT +extern void mp_register_lapic (u8 id, u8 enabled); +extern void mp_register_lapic_address (u64 address); + +#ifdef CONFIG_X86_IO_APIC +extern void mp_register_ioapic (u8 id, u32 address, u32 irq_base); +extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 global_irq); +extern void mp_config_acpi_legacy_irqs (void); +extern void mp_config_ioapic_for_sci(int irq); +extern void mp_parse_prt (void); +#else /*!CONFIG_X86_IO_APIC*/ +static inline void mp_config_ioapic_for_sci(int irq) { } +#endif /*!CONFIG_X86_IO_APIC*/ + +#endif /*CONFIG_ACPI_BOOT*/ + +#endif + diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h new file mode 100644 index 0000000000..2a938479b9 --- /dev/null +++ b/xen/include/asm-x86/msr.h @@ -0,0 +1,206 @@ +#ifndef __ASM_MSR_H +#define __ASM_MSR_H + +/* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ + +#define rdmsr(msr,val1,val2) \ + __asm__ __volatile__("rdmsr" \ + : "=a" (val1), "=d" (val2) \ + : "c" (msr)) + +#define rdmsrl(msr,val) do { unsigned long a__,b__; \ + __asm__ __volatile__("rdmsr" \ + : "=a" (a__), "=d" (b__) \ + : "c" (msr)); \ + val = a__ | (b__<<32); \ +} while(0); + +#define wrmsr(msr,val1,val2) \ + __asm__ __volatile__("wrmsr" \ + : /* no outputs */ \ + : "c" (msr), "a" (val1), "d" (val2)) + +#define rdtsc(low,high) \ + __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) + +#define rdtscl(low) \ + __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") + +#ifdef x86_32 +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) +#else +#define rdtscll(val) do { \ + unsigned int a,d; \ + asm volatile("rdtsc" : "=a" (a), "=d" (d)); \ + (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \ +} while(0) +#endif + +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + +#define rdpmc(counter,low,high) \ + __asm__ __volatile__("rdpmc" \ + : "=a" (low), "=d" (high) \ + : "c" (counter)) + +/* symbolic names for some interesting MSRs */ +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0 +#define MSR_IA32_P5_MC_TYPE 1 +#define MSR_IA32_PLATFORM_ID 0x17 +#define MSR_IA32_EBL_CR_POWERON 0x2a + +/* AMD/K8 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compatibility mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit GS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit FS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */ +/* EFER bits: */ +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define _EFER_NX 11 /* No execute enable */ + +#define EFER_SCE (1<<_EFER_SCE) +#define EFER_LME (1< +typedef struct { unsigned long l1_lo; } l1_pgentry_t; +typedef struct { unsigned long l2_lo; } l2_pgentry_t; +typedef l1_pgentry_t *l1_pagetable_t; +typedef l2_pgentry_t *l2_pagetable_t; +typedef struct { unsigned long pt_lo; } pagetable_t; +#endif /* !__ASSEMBLY__ */ + +/* Strip type from a table entry. */ +#define l1_pgentry_val(_x) ((_x).l1_lo) +#define l2_pgentry_val(_x) ((_x).l2_lo) +#define pagetable_val(_x) ((_x).pt_lo) + +#define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL)) +#define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL)) + +/* Add type to a table entry. */ +#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } ) +#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } ) +#define mk_pagetable(_x) ( (pagetable_t) { (_x) } ) + +/* Turn a typed table entry into a page index. */ +#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT) +#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT) + +/* Turn a typed table entry into a physical address. */ +#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK) +#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK) + +/* Dereference a typed level-2 entry to yield a typed level-1 table. */ +#define l2_pgentry_to_l1(_x) \ + ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK)) + +/* Given a virtual address, get an entry offset into a page table. */ +#define l1_table_offset(_a) \ + (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1)) +#define l2_table_offset(_a) \ + ((_a) >> L2_PAGETABLE_SHIFT) + +/* Hypervisor table entries use zero to sugnify 'empty'. */ +#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x)) +#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x)) + +#define __PAGE_OFFSET (0xFC400000) +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT)) +#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT)) +#define VALID_PAGE(page) ((page - frame_table) < max_mapnr) + +/* + * NB. We don't currently track I/O holes in the physical RAM space. + * For now we guess that I/O devices will be mapped in the first 1MB + * (e.g., VGA buffers) or beyond the end of physical RAM. + */ +#define pfn_is_ram(_pfn) (((_pfn) > 0x100) && ((_pfn) < max_page)) + +/* High table entries are reserved by the hypervisor. */ +#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \ + (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) +#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \ + (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE) + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include + +#define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START) +#define linear_l2_table ((l2_pgentry_t *)(LINEAR_PT_VIRT_START+(LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT)))) + +#define va_to_l1mfn(_va) (l2_pgentry_val(linear_l2_table[_va>>L2_PAGETABLE_SHIFT]) >> PAGE_SHIFT) + +extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE]; +extern void paging_init(void); + +#define __flush_tlb() \ + do { \ + __asm__ __volatile__ ( \ + "movl %%cr3, %%eax; movl %%eax, %%cr3" \ + : : : "memory", "eax" ); \ + tlb_clocktick(); \ + } while ( 0 ) + +/* Flush global pages as well. */ + +#define __pge_off() \ + do { \ + __asm__ __volatile__( \ + "movl %0, %%cr4; # turn off PGE " \ + :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \ + } while (0) + +#define __pge_on() \ + do { \ + __asm__ __volatile__( \ + "movl %0, %%cr4; # turn off PGE " \ + :: "r" (mmu_cr4_features)); \ + } while (0) + + +#define __flush_tlb_pge() \ + do { \ + __pge_off(); \ + __flush_tlb(); \ + __pge_on(); \ + } while (0) + +#define __flush_tlb_one(__addr) \ +__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr))) + +#endif /* !__ASSEMBLY__ */ + + +#define _PAGE_PRESENT 0x001 +#define _PAGE_RW 0x002 +#define _PAGE_USER 0x004 +#define _PAGE_PWT 0x008 +#define _PAGE_PCD 0x010 +#define _PAGE_ACCESSED 0x020 +#define _PAGE_DIRTY 0x040 +#define _PAGE_PAT 0x080 +#define _PAGE_PSE 0x080 +#define _PAGE_GLOBAL 0x100 + +#define __PAGE_HYPERVISOR \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define __PAGE_HYPERVISOR_NOCACHE \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) +#define __PAGE_HYPERVISOR_RO \ + (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) + +#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL) + +#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR) +#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO) +#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE) + +#define mk_l2_writeable(_p) \ + (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) | _PAGE_RW)) +#define mk_l2_readonly(_p) \ + (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) & ~_PAGE_RW)) +#define mk_l1_writeable(_p) \ + (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) | _PAGE_RW)) +#define mk_l1_readonly(_p) \ + (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW)) + + +#ifndef __ASSEMBLY__ +static __inline__ int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} +#endif + +#endif /* _I386_PAGE_H */ diff --git a/xen/include/asm-x86/param.h b/xen/include/asm-x86/param.h new file mode 100644 index 0000000000..efc29a8da2 --- /dev/null +++ b/xen/include/asm-x86/param.h @@ -0,0 +1,24 @@ +#ifndef _ASM_PARAM_H +#define _ASM_PARAM_H + +#ifndef HZ +#define HZ 100 +#endif + +#define EXEC_PAGESIZE 4096 + +#ifndef NGROUPS +#define NGROUPS 32 +#endif + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#ifdef __KERNEL__ +# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */ +#endif + +#endif diff --git a/xen/include/asm-x86/pci.h b/xen/include/asm-x86/pci.h new file mode 100644 index 0000000000..1a217315a4 --- /dev/null +++ b/xen/include/asm-x86/pci.h @@ -0,0 +1,35 @@ +#ifndef __X86_PCI_H +#define __X86_PCI_H + +#include + +/* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ + +#ifdef CONFIG_PCI +extern unsigned int pcibios_assign_all_busses(void); +#else +#define pcibios_assign_all_busses() 0 +#endif +#define pcibios_scan_all_fns() 0 + +extern unsigned long pci_mem_start; +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM (pci_mem_start) + +void pcibios_config_init(void); +struct pci_bus * pcibios_scan_root(int bus); +extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value); +extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value); + +void pcibios_set_master(struct pci_dev *dev); +void pcibios_penalize_isa_irq(int irq); +struct irq_routing_table *pcibios_get_irq_routing_table(void); +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); + +#include +#include +#include + +#endif /* __X86_PCI_H */ diff --git a/xen/include/asm-x86/pdb.h b/xen/include/asm-x86/pdb.h new file mode 100644 index 0000000000..2ed6a9a318 --- /dev/null +++ b/xen/include/asm-x86/pdb.h @@ -0,0 +1,88 @@ + +/* + * pervasive debugger + * www.cl.cam.ac.uk/netos/pdb + * + * alex ho + * 2004 + * university of cambridge computer laboratory + */ + + +#ifndef __PDB_H__ +#define __PDB_H__ + +#include +#include +#include +#include /* for domain id */ + +extern int pdb_initialized; +extern int pdb_com_port; +extern int pdb_high_bit; +extern int pdb_page_fault_possible; +extern int pdb_page_fault_scratch; +extern int pdb_page_fault; + +extern void initialize_pdb(void); + +/* Get/set values from generic debug interface. */ +extern int pdb_set_values(u_char *buffer, int length, + unsigned long cr3, unsigned long addr); +extern int pdb_get_values(u_char *buffer, int length, + unsigned long cr3, unsigned long addr); + +/* External entry points. */ +extern int pdb_handle_exception(int exceptionVector, + struct pt_regs *xen_regs); +extern int pdb_serial_input(u_char c, struct pt_regs *regs); +extern void pdb_do_debug(dom0_op_t *op); + +/* PDB Context. */ +struct pdb_context +{ + int valid; + int domain; + int process; + int system_call; /* 0x01 break on enter, 0x02 break on exit */ + unsigned long ptbr; +}; +extern struct pdb_context pdb_ctx; + +/* Breakpoints. */ +struct pdb_breakpoint +{ + struct list_head list; + unsigned long address; + unsigned long cr3; + domid_t domain; +}; +extern void pdb_bkpt_add (unsigned long cr3, unsigned long address); +extern struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3, + unsigned long address); +extern int pdb_bkpt_remove (unsigned long cr3, unsigned long address); + +/* Conversions. */ +extern int hex (char); +extern char *mem2hex (char *, char *, int); +extern char *hex2mem (char *, char *, int); +extern int hexToInt (char **ptr, int *intValue); + +/* Temporary Linux specific definitions */ +extern int pdb_system_call; +extern unsigned char pdb_system_call_enter_instr; /* original enter instr */ +extern unsigned char pdb_system_call_leave_instr; /* original next instr */ +extern unsigned long pdb_system_call_next_addr; /* instr after int 0x80 */ +extern unsigned long pdb_system_call_eflags_addr; /* saved eflags on stack */ + +unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid); +void pdb_linux_get_values(char *buffer, int length, unsigned long address, + int pid, unsigned long cr3); +void pdb_linux_set_values(char *buffer, int length, unsigned long address, + int pid, unsigned long cr3); +void pdb_linux_syscall_enter_bkpt (struct pt_regs *regs, long error_code, + trap_info_t *ti); +void pdb_linux_syscall_exit_bkpt (struct pt_regs *regs, + struct pdb_context *pdb_ctx); + +#endif /* __PDB_H__ */ diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h new file mode 100644 index 0000000000..823c6ca851 --- /dev/null +++ b/xen/include/asm-x86/processor.h @@ -0,0 +1,563 @@ +/* + * include/asm-i386/processor.h + * + * Copyright (C) 1994 Linus Torvalds + */ + +#ifndef __ASM_I386_PROCESSOR_H +#define __ASM_I386_PROCESSOR_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct task_struct; + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#define current_text_addr() \ + ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) + +/* + * CPU type and hardware bug flags. Kept separately for each CPU. + * Members of this structure are referenced in head.S, so think twice + * before touching them. [mj] + */ + +struct cpuinfo_x86 { + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; + char wp_works_ok; /* It doesn't on 386's */ + char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ + char hard_math; + char rfu; + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ + __u32 x86_capability[NCAPINTS]; + char x86_vendor_id[16]; + char x86_model_id[64]; + int x86_cache_size; /* in KB - valid for CPUS which support this + call */ + int fdiv_bug; + int f00f_bug; + int coma_bug; + unsigned long loops_per_jiffy; + unsigned long *pgd_quick; + unsigned long *pmd_quick; + unsigned long *pte_quick; + unsigned long pgtable_cache_sz; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +#define X86_VENDOR_INTEL 0 +#define X86_VENDOR_CYRIX 1 +#define X86_VENDOR_AMD 2 +#define X86_VENDOR_UMC 3 +#define X86_VENDOR_NEXGEN 4 +#define X86_VENDOR_CENTAUR 5 +#define X86_VENDOR_RISE 6 +#define X86_VENDOR_TRANSMETA 7 +#define X86_VENDOR_NSC 8 +#define X86_VENDOR_SIS 9 +#define X86_VENDOR_UNKNOWN 0xff + +/* + * capabilities of CPUs + */ + +extern struct cpuinfo_x86 boot_cpu_data; +extern struct tss_struct init_tss[NR_CPUS]; + +#ifdef CONFIG_SMP +extern struct cpuinfo_x86 cpu_data[]; +#define current_cpu_data cpu_data[smp_processor_id()] +#else +#define cpu_data (&boot_cpu_data) +#define current_cpu_data boot_cpu_data +#endif + +extern char ignore_irq13; + +extern void identify_cpu(struct cpuinfo_x86 *); +extern void print_cpu_info(struct cpuinfo_x86 *); +extern void dodgy_tsc(void); + +/* + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +/* + * Generic CPUID function + */ +static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (op)); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax; + + __asm__("cpuid" + : "=a" (eax) + : "0" (op) + : "bx", "cx", "dx"); + return eax; +} +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx) + : "0" (op) + : "cx", "dx" ); + return ebx; +} +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ecx; + + __asm__("cpuid" + : "=a" (eax), "=c" (ecx) + : "0" (op) + : "bx", "dx" ); + return ecx; +} +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, edx; + + __asm__("cpuid" + : "=a" (eax), "=d" (edx) + : "0" (op) + : "bx", "cx"); + return edx; +} + + +/* + * Intel CPU flags in CR0 + */ +#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */ +#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor (RW) */ +#define X86_CR0_EM 0x00000004 /* Require FPU Emulation (RO) */ +#define X86_CR0_TS 0x00000008 /* Task Switched (RW) */ +#define X86_CR0_NE 0x00000020 /* Numeric Error Reporting (RW) */ +#define X86_CR0_WP 0x00010000 /* Supervisor Write Protect (RW) */ +#define X86_CR0_AM 0x00040000 /* Alignment Checking (RW) */ +#define X86_CR0_NW 0x20000000 /* Not Write-Through (RW) */ +#define X86_CR0_CD 0x40000000 /* Cache Disable (RW) */ +#define X86_CR0_PG 0x80000000 /* Paging (RW) */ + +#define read_cr0() ({ \ + unsigned int __dummy; \ + __asm__( \ + "movl %%cr0,%0\n\t" \ + :"=r" (__dummy)); \ + __dummy; \ +}) + +#define write_cr0(x) \ + __asm__("movl %0,%%cr0": :"r" (x)); + + +/* + * Intel CPU features in CR4 + */ +#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ +#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ +#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ +#define X86_CR4_DE 0x0008 /* enable debugging extensions */ +#define X86_CR4_PSE 0x0010 /* enable page size extensions */ +#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ +#define X86_CR4_MCE 0x0040 /* Machine check enable */ +#define X86_CR4_PGE 0x0080 /* enable global pages */ +#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ +#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ +#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ + +/* + * Save the cr4 feature set we're using (ie + * Pentium 4MB enable and PPro Global page + * enable), so that any CPU's that boot up + * after us can get the correct flags. + */ +extern unsigned long mmu_cr4_features; + +static inline void set_in_cr4 (unsigned long mask) +{ + mmu_cr4_features |= mask; + __asm__("movl %%cr4,%%eax\n\t" + "orl %0,%%eax\n\t" + "movl %%eax,%%cr4\n" + : : "irg" (mask) + :"ax"); +} + +static inline void clear_in_cr4 (unsigned long mask) +{ + mmu_cr4_features &= ~mask; + __asm__("movl %%cr4,%%eax\n\t" + "andl %0,%%eax\n\t" + "movl %%eax,%%cr4\n" + : : "irg" (~mask) + :"ax"); +} + + + +/* + * Cyrix CPU configuration register indexes + */ +#define CX86_CCR0 0xc0 +#define CX86_CCR1 0xc1 +#define CX86_CCR2 0xc2 +#define CX86_CCR3 0xc3 +#define CX86_CCR4 0xe8 +#define CX86_CCR5 0xe9 +#define CX86_CCR6 0xea +#define CX86_CCR7 0xeb +#define CX86_DIR0 0xfe +#define CX86_DIR1 0xff +#define CX86_ARR_BASE 0xc4 +#define CX86_RCR_BASE 0xdc + +/* + * Cyrix CPU indexed register access macros + */ + +#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) + +#define setCx86(reg, data) do { \ + outb((reg), 0x22); \ + outb((data), 0x23); \ +} while (0) + +#define EISA_bus (0) +#define MCA_bus (0) + +/* from system description table in BIOS. Mostly for MCA use, but +others may find it useful. */ +extern unsigned int machine_id; +extern unsigned int machine_submodel_id; +extern unsigned int BIOS_revision; +extern unsigned int mca_pentium_flag; + +/* + * User space process size: 3GB (default). + */ +#define TASK_SIZE (PAGE_OFFSET) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) + +/* + * Size of io_bitmap in longwords: + * For Xen we support the full 8kbyte IO bitmap but use the io_bitmap_sel field + * of the task_struct to avoid a full 8kbyte copy when switching to / from + * domains with bits cleared. + */ +#define IO_BITMAP_SIZE 2048 +#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4) +#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) +#define INVALID_IO_BITMAP_OFFSET 0x8000 + +struct i387_fsave_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + long status; /* software status information */ +}; + +struct i387_fxsave_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; +} __attribute__ ((aligned (16))); + +struct i387_soft_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + unsigned char ftop, changed, lookahead, no_update, rm, alimit; + struct info *info; + unsigned long entry_eip; +}; + +union i387_union { + struct i387_fsave_struct fsave; + struct i387_fxsave_struct fxsave; + struct i387_soft_struct soft; +}; + +typedef struct { + unsigned long seg; +} mm_segment_t; + +struct tss_struct { + unsigned short back_link,__blh; + unsigned long esp0; + unsigned short ss0,__ss0h; + unsigned long esp1; + unsigned short ss1,__ss1h; + unsigned long esp2; + unsigned short ss2,__ss2h; + unsigned long __cr3; + unsigned long eip; + unsigned long eflags; + unsigned long eax,ecx,edx,ebx; + unsigned long esp; + unsigned long ebp; + unsigned long esi; + unsigned long edi; + unsigned short es, __esh; + unsigned short cs, __csh; + unsigned short ss, __ssh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; + unsigned short ldt, __ldth; + unsigned short trace, bitmap; + unsigned long io_bitmap[IO_BITMAP_SIZE+1]; + /* + * pads the TSS to be cacheline-aligned (total size is 0x2080) + */ + unsigned long __cacheline_filler[5]; +}; + +struct thread_struct { + unsigned long guestos_sp, guestos_ss; +/* Hardware debugging registers */ + unsigned long debugreg[8]; /* %%db0-7 debug registers */ +/* floating point info */ + union i387_union i387; +/* Trap info. */ + int fast_trap_idx; + struct desc_struct fast_trap_desc; + trap_info_t traps[256]; +}; + +#define IDT_ENTRIES 256 +extern struct desc_struct idt_table[]; +extern struct desc_struct *idt_tables[]; + +#define SET_DEFAULT_FAST_TRAP(_p) \ + (_p)->fast_trap_idx = 0x20; \ + (_p)->fast_trap_desc.a = 0; \ + (_p)->fast_trap_desc.b = 0; + +#define CLEAR_FAST_TRAP(_p) \ + (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ + 0, 8)) + +#ifdef XEN_DEBUGGER +#define SET_FAST_TRAP(_p) \ + (pdb_initialized ? (void *) 0 : \ + (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ + &((_p)->fast_trap_desc), 8))) +#else +#define SET_FAST_TRAP(_p) \ + (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \ + &((_p)->fast_trap_desc), 8)) +#endif + +long set_fast_trap(struct task_struct *p, int idx); + +#define INIT_THREAD { \ + 0, 0, \ + { [0 ... 7] = 0 }, /* debugging registers */ \ + { { 0, }, }, /* 387 state */ \ + 0x20, { 0, 0 }, /* DEFAULT_FAST_TRAP */ \ + { {0} } /* io permissions */ \ +} + +#define INIT_TSS { \ + 0,0, /* back_link, __blh */ \ + 0, /* esp0 */ \ + 0, 0, /* ss0 */ \ + 0,0,0,0,0,0, /* stack1, stack2 */ \ + 0, /* cr3 */ \ + 0,0, /* eip,eflags */ \ + 0,0,0,0, /* eax,ecx,edx,ebx */ \ + 0,0,0,0, /* esp,ebp,esi,edi */ \ + 0,0,0,0,0,0, /* es,cs,ss */ \ + 0,0,0,0,0,0, /* ds,fs,gs */ \ + 0,0, /* ldt */ \ + 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ + { [0 ... IO_BITMAP_SIZE] = ~0UL }, /* ioperm */ \ +} + +struct mm_struct { + /* + * Every domain has a L1 pagetable of its own. Per-domain mappings + * are put in this table (eg. the current GDT is mapped here). + */ + l1_pgentry_t *perdomain_pt; + pagetable_t pagetable; + + /* shadow mode status and controls */ + unsigned int shadow_mode; /* flags to control shadow table operation */ + pagetable_t shadow_table; + spinlock_t shadow_lock; + unsigned int shadow_max_page_count; // currently unused + + /* shadow hashtable */ + struct shadow_status *shadow_ht; + struct shadow_status *shadow_ht_free; + struct shadow_status *shadow_ht_extras; /* extra allocation units */ + unsigned int shadow_extras_count; + + /* shadow dirty bitmap */ + unsigned long *shadow_dirty_bitmap; + unsigned int shadow_dirty_bitmap_size; /* in pages, bit per page */ + + /* shadow mode stats */ + unsigned int shadow_page_count; + unsigned int shadow_fault_count; + unsigned int shadow_dirty_count; + + + /* Current LDT details. */ + unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt; + /* Next entry is passed to LGDT on domain switch. */ + char gdt[6]; +}; + +static inline void write_ptbase(struct mm_struct *mm) +{ + unsigned long pa; + + if ( unlikely(mm->shadow_mode) ) + pa = pagetable_val(mm->shadow_table); + else + pa = pagetable_val(mm->pagetable); + + __asm__ __volatile__ ( "movl %0, %%cr3" : : "r" (pa) : "memory" ); +} + +#define IDLE0_MM \ +{ \ + perdomain_pt: 0, \ + pagetable: mk_pagetable(__pa(idle_pg_table)) \ +} + +/* Convenient accessor for mm.gdt. */ +#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e)) +#define SET_GDT_ADDRESS(_p, _a) ((*(u32 *)((_p)->mm.gdt + 2)) = (_a)) +#define GET_GDT_ENTRIES(_p) ((*(u16 *)((_p)->mm.gdt + 0))) +#define GET_GDT_ADDRESS(_p) ((*(u32 *)((_p)->mm.gdt + 2))) + +long set_gdt(struct task_struct *p, + unsigned long *frames, + unsigned int entries); + +long set_debugreg(struct task_struct *p, int reg, unsigned long value); + +struct microcode { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int reserved[5]; + unsigned int bits[500]; +}; + +/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ +#define MICROCODE_IOCFREE _IO('6',0) + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop"); +} + +#define cpu_relax() rep_nop() + +/* Prefetch instructions for Pentium III and AMD Athlon */ +#ifdef CONFIG_MPENTIUMIII + +#define ARCH_HAS_PREFETCH +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); +} + +#elif CONFIG_X86_USE_3DNOW + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} + +extern inline void prefetchw(const void *x) +{ + __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); +} +#define spin_lock_prefetch(x) prefetchw(x) + +#endif + +#endif /* __ASM_I386_PROCESSOR_H */ diff --git a/xen/include/asm-x86/ptrace.h b/xen/include/asm-x86/ptrace.h new file mode 100644 index 0000000000..26269afcb0 --- /dev/null +++ b/xen/include/asm-x86/ptrace.h @@ -0,0 +1,51 @@ +#ifndef _I386_PTRACE_H +#define _I386_PTRACE_H + +struct pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + int xfs; + int xgs; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; + +enum EFLAGS { + EF_CF = 0x00000001, + EF_PF = 0x00000004, + EF_AF = 0x00000010, + EF_ZF = 0x00000040, + EF_SF = 0x00000080, + EF_TF = 0x00000100, + EF_IE = 0x00000200, + EF_DF = 0x00000400, + EF_OF = 0x00000800, + EF_IOPL = 0x00003000, + EF_IOPL_RING0 = 0x00000000, + EF_IOPL_RING1 = 0x00001000, + EF_IOPL_RING2 = 0x00002000, + EF_NT = 0x00004000, /* nested task */ + EF_RF = 0x00010000, /* resume */ + EF_VM = 0x00020000, /* virtual mode */ + EF_AC = 0x00040000, /* alignment */ + EF_VIF = 0x00080000, /* virtual interrupt */ + EF_VIP = 0x00100000, /* virtual interrupt pending */ + EF_ID = 0x00200000, /* id */ +}; + +#ifdef __KERNEL__ +#define user_mode(regs) ((3 & (regs)->xcs)) +#endif + +#endif diff --git a/xen/include/asm-x86/rwlock.h b/xen/include/asm-x86/rwlock.h new file mode 100644 index 0000000000..7519f32713 --- /dev/null +++ b/xen/include/asm-x86/rwlock.h @@ -0,0 +1,83 @@ +/* include/asm-x86/rwlock.h + * + * Helpers used by both rw spinlocks and rw semaphores. + * + * Based in part on code from semaphore.h and + * spinlock.h Copyright 1996 Linus Torvalds. + * + * Copyright 1999 Red Hat, Inc. + * + * Written by Benjamin LaHaise. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _ASM_X86_RWLOCK_H +#define _ASM_X86_RWLOCK_H + +#define RW_LOCK_BIAS 0x01000000 +#define RW_LOCK_BIAS_STR "0x01000000" + +#define __build_read_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $1,(%0)\n\t" \ + "js 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tcall " helper "\n\t" \ + "jmp 1b\n" \ + ".previous" \ + ::"a" (rw) : "memory") + +#define __build_read_lock_const(rw, helper) \ + asm volatile(LOCK "subl $1,%0\n\t" \ + "js 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tpush"__OS" %%"__OP"ax\n\t" \ + "lea"__OS" %0,%%"__OP"ax\n\t" \ + "call " helper "\n\t" \ + "pop"__OS" %%"__OP"ax\n\t" \ + "jmp 1b\n" \ + ".previous" \ + :"=m" (*(volatile int *)rw) : : "memory") + +#define __build_read_lock(rw, helper) do { \ + if (__builtin_constant_p(rw)) \ + __build_read_lock_const(rw, helper); \ + else \ + __build_read_lock_ptr(rw, helper); \ + } while (0) + +#define __build_write_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + "jnz 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tcall " helper "\n\t" \ + "jmp 1b\n" \ + ".previous" \ + ::"a" (rw) : "memory") + +#define __build_write_lock_const(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + "jnz 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tpush"__OS" %%"__OP"ax\n\t" \ + "lea"__OS" %0,%%"__OP"ax\n\t" \ + "call " helper "\n\t" \ + "pop"__OS" %%"__OP"ax\n\t" \ + "jmp 1b\n" \ + ".previous" \ + :"=m" (*(volatile int *)rw) : : "memory") + +#define __build_write_lock(rw, helper) do { \ + if (__builtin_constant_p(rw)) \ + __build_write_lock_const(rw, helper); \ + else \ + __build_write_lock_ptr(rw, helper); \ + } while (0) + +#endif diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h new file mode 100644 index 0000000000..8d8bdcd06f --- /dev/null +++ b/xen/include/asm-x86/smp.h @@ -0,0 +1,120 @@ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +/* + * We need the APIC definitions automatically as part of 'smp.h' + */ +#ifndef __ASSEMBLY__ +#include +/*#include */ +#include +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +#ifndef __ASSEMBLY__ +#include +#include +#include +#ifdef CONFIG_X86_IO_APIC +#include +#endif +#include +#endif +#endif + +#ifdef CONFIG_SMP +#ifndef __ASSEMBLY__ + +/* + * Private routines/data + */ + +extern void smp_alloc_memory(void); +extern unsigned long phys_cpu_present_map; +extern unsigned long cpu_online_map; +extern volatile unsigned long smp_invalidate_needed; +extern int pic_mode; +extern int smp_num_siblings; +extern int cpu_sibling_map[]; + +extern void smp_flush_tlb(void); +extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); +extern void smp_send_reschedule(int cpu); +extern void smp_invalidate_rcv(void); /* Process an NMI */ +extern void (*mtrr_hook) (void); +extern void zap_low_mappings (void); + +/* + * On x86 all CPUs are mapped 1:1 to the APIC space. + * This simplifies scheduling and IPI sending and + * compresses data structures. + */ +static inline int cpu_logical_map(int cpu) +{ + return cpu; +} +static inline int cpu_number_map(int cpu) +{ + return cpu; +} + +/* + * Some lowlevel functions might want to know about + * the real APIC ID <-> CPU # mapping. + */ +#define MAX_APICID 256 +extern volatile int cpu_to_physical_apicid[NR_CPUS]; +extern volatile int physical_apicid_to_cpu[MAX_APICID]; +extern volatile int cpu_to_logical_apicid[NR_CPUS]; +extern volatile int logical_apicid_to_cpu[MAX_APICID]; + +/* + * General functions that each host system must provide. + */ + +/*extern void smp_boot_cpus(void);*/ +extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ + +/* + * This function is needed by all SMP systems. It must _always_ be valid + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ + +#ifdef x86_32 +#define smp_processor_id() (current->processor) +#else +#include +#define smp_processor_id() read_pda(cpunumber) +#endif + +static __inline int hard_smp_processor_id(void) +{ + /* we don't want to mark this access volatile - bad code generation */ + return GET_APIC_ID(*(unsigned *)(APIC_BASE+APIC_ID)); +} + +static __inline int logical_smp_processor_id(void) +{ + /* we don't want to mark this access volatile - bad code generation */ + return GET_APIC_LOGICAL_ID(*(unsigned *)(APIC_BASE+APIC_LDR)); +} + +#endif /* !__ASSEMBLY__ */ + +#define NO_PROC_ID 0xFF /* No processor magic marker */ + +/* + * This magic constant controls our willingness to transfer + * a process across CPUs. Such a transfer incurs misses on the L1 + * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My + * gut feeling is this will vary by board in value. For a board + * with separate L2 cache it probably depends also on the RSS, and + * for a board with shared L2 cache it ought to decay fast as other + * processes are run. + */ + +#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ + +#endif +#endif diff --git a/xen/include/asm-x86/smpboot.h b/xen/include/asm-x86/smpboot.h new file mode 100644 index 0000000000..7a0b157114 --- /dev/null +++ b/xen/include/asm-x86/smpboot.h @@ -0,0 +1,132 @@ +#ifndef __ASM_SMPBOOT_H +#define __ASM_SMPBOOT_H + +/*emum for clustered_apic_mode values*/ +enum{ + CLUSTERED_APIC_NONE = 0, + CLUSTERED_APIC_XAPIC, + CLUSTERED_APIC_NUMAQ +}; + +#ifdef CONFIG_X86_CLUSTERED_APIC +extern unsigned int apic_broadcast_id; +extern unsigned char clustered_apic_mode; +extern unsigned char esr_disable; +extern unsigned char int_delivery_mode; +extern unsigned int int_dest_addr_mode; +extern int cyclone_setup(char*); + +static inline void detect_clustered_apic(char* oem, char* prod) +{ + /* + * Can't recognize Summit xAPICs at present, so use the OEM ID. + */ + if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){ + clustered_apic_mode = CLUSTERED_APIC_XAPIC; + apic_broadcast_id = APIC_BROADCAST_ID_XAPIC; + int_dest_addr_mode = APIC_DEST_PHYSICAL; + int_delivery_mode = dest_Fixed; + esr_disable = 1; + /*Start cyclone clock*/ + cyclone_setup(0); + /* check for ACPI tables */ + } else if (!strncmp(oem, "IBM", 3) && + (!strncmp(prod, "SERVIGIL", 8) || + !strncmp(prod, "EXA", 3) || + !strncmp(prod, "RUTHLESS", 8))){ + clustered_apic_mode = CLUSTERED_APIC_XAPIC; + apic_broadcast_id = APIC_BROADCAST_ID_XAPIC; + int_dest_addr_mode = APIC_DEST_PHYSICAL; + int_delivery_mode = dest_Fixed; + esr_disable = 1; + /*Start cyclone clock*/ + cyclone_setup(0); + } else if (!strncmp(oem, "IBM NUMA", 8)){ + clustered_apic_mode = CLUSTERED_APIC_NUMAQ; + apic_broadcast_id = APIC_BROADCAST_ID_APIC; + int_dest_addr_mode = APIC_DEST_LOGICAL; + int_delivery_mode = dest_LowestPrio; + esr_disable = 1; + } +} +#define INT_DEST_ADDR_MODE (int_dest_addr_mode) +#define INT_DELIVERY_MODE (int_delivery_mode) +#else /* CONFIG_X86_CLUSTERED_APIC */ +#define apic_broadcast_id (APIC_BROADCAST_ID_APIC) +#define clustered_apic_mode (CLUSTERED_APIC_NONE) +#define esr_disable (0) +#define detect_clustered_apic(x,y) +#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL) /* logical delivery */ +#define INT_DELIVERY_MODE (dest_LowestPrio) +#endif /* CONFIG_X86_CLUSTERED_APIC */ +#define BAD_APICID 0xFFu + +#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467) +#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469) + +#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid) + +extern unsigned char raw_phys_apicid[NR_CPUS]; + +/* + * How to map from the cpu_present_map + */ +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (clustered_apic_mode == CLUSTERED_APIC_XAPIC) + return raw_phys_apicid[mps_cpu]; + if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) + return (mps_cpu/4)*16 + (1<<(mps_cpu%4)); + return mps_cpu; +} + +static inline unsigned long apicid_to_phys_cpu_present(int apicid) +{ + if(clustered_apic_mode) + return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3)); + return 1UL << apicid; +} + +#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) ) + +/* + * Mappings between logical cpu number and logical / physical apicid + * The first four macros are trivial, but it keeps the abstraction consistent + */ +extern volatile int logical_apicid_2_cpu[]; +extern volatile int cpu_2_logical_apicid[]; +extern volatile int physical_apicid_2_cpu[]; +extern volatile int cpu_2_physical_apicid[]; + +#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid] +#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu] +#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid] +#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu] +#ifdef CONFIG_MULTIQUAD /* use logical IDs to bootstrap */ +#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid] +#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu] +#else /* !CONFIG_MULTIQUAD */ /* use physical IDs to bootstrap */ +#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid] +#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu] +#endif /* CONFIG_MULTIQUAD */ + +#ifdef CONFIG_X86_CLUSTERED_APIC +static inline int target_cpus(void) +{ + static int cpu; + switch(clustered_apic_mode){ + case CLUSTERED_APIC_NUMAQ: + /* Broadcast intrs to local quad only. */ + return APIC_BROADCAST_ID_APIC; + case CLUSTERED_APIC_XAPIC: + /*round robin the interrupts*/ + cpu = (cpu+1)%smp_num_cpus; + return cpu_to_physical_apicid(cpu); + default: + } + return cpu_online_map; +} +#else +#define target_cpus() (cpu_online_map) +#endif +#endif diff --git a/xen/include/asm-x86/softirq.h b/xen/include/asm-x86/softirq.h new file mode 100644 index 0000000000..292baac6ea --- /dev/null +++ b/xen/include/asm-x86/softirq.h @@ -0,0 +1,17 @@ +#ifndef __ASM_SOFTIRQ_H +#define __ASM_SOFTIRQ_H + +#include +#include + +#define cpu_bh_enable(cpu) \ + do { barrier(); local_bh_count(cpu)--; } while (0) +#define cpu_bh_disable(cpu) \ + do { local_bh_count(cpu)++; barrier(); } while (0) + +#define local_bh_disable() cpu_bh_disable(smp_processor_id()) +#define local_bh_enable() cpu_bh_enable(smp_processor_id()) + +#define in_softirq() (local_bh_count(smp_processor_id()) != 0) + +#endif /* __ASM_SOFTIRQ_H */ diff --git a/xen/include/asm-x86/spinlock.h b/xen/include/asm-x86/spinlock.h new file mode 100644 index 0000000000..5cbb5a413e --- /dev/null +++ b/xen/include/asm-x86/spinlock.h @@ -0,0 +1,205 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include +#include +#include +#include + +#if 0 +#define SPINLOCK_DEBUG 1 +#else +#define SPINLOCK_DEBUG 0 +#endif + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + */ + +typedef struct { + volatile unsigned int lock; +#if SPINLOCK_DEBUG + unsigned magic; +#endif +} spinlock_t; + +#define SPINLOCK_MAGIC 0xdead4ead + +#if SPINLOCK_DEBUG +#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC +#else +#define SPINLOCK_MAGIC_INIT /* */ +#endif + +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT } + +#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0) +#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + +#define spin_lock_string \ + "\n1:\t" \ + "lock ; decb %0\n\t" \ + "js 2f\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\t" \ + "cmpb $0,%0\n\t" \ + "rep;nop\n\t" \ + "jle 2b\n\t" \ + "jmp 1b\n" \ + ".previous" + +/* + * This works. Despite all the confusion. + * (except on PPro SMP or if we are using OOSTORE) + * (PPro errata 66, 92) + */ + +#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) + +#define spin_unlock_string \ + "movb $1,%0" \ + :"=m" (lock->lock) : : "memory" + + +static inline void spin_unlock(spinlock_t *lock) +{ +#if SPINLOCK_DEBUG + if (lock->magic != SPINLOCK_MAGIC) + BUG(); + if (!spin_is_locked(lock)) + BUG(); +#endif + __asm__ __volatile__( + spin_unlock_string + ); +} + +#else + +#define spin_unlock_string \ + "xchgb %b0, %1" \ + :"=q" (oldval), "=m" (lock->lock) \ + :"0" (oldval) : "memory" + +static inline void spin_unlock(spinlock_t *lock) +{ + char oldval = 1; +#if SPINLOCK_DEBUG + if (lock->magic != SPINLOCK_MAGIC) + BUG(); + if (!spin_is_locked(lock)) + BUG(); +#endif + __asm__ __volatile__( + spin_unlock_string + ); +} + +#endif + +static inline int spin_trylock(spinlock_t *lock) +{ + char oldval; + __asm__ __volatile__( + "xchgb %b0,%1" + :"=q" (oldval), "=m" (lock->lock) + :"0" (0) : "memory"); + return oldval > 0; +} + +static inline void spin_lock(spinlock_t *lock) +{ +#if SPINLOCK_DEBUG + __label__ here; +here: + if (lock->magic != SPINLOCK_MAGIC) { +printk("eip: %p\n", &&here); + BUG(); + } +#endif + __asm__ __volatile__( + spin_lock_string + :"=m" (lock->lock) : : "memory"); +} + + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ +typedef struct { + volatile unsigned int lock; +#if SPINLOCK_DEBUG + unsigned magic; +#endif +} rwlock_t; + +#define RWLOCK_MAGIC 0xdeaf1eed + +#if SPINLOCK_DEBUG +#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC +#else +#define RWLOCK_MAGIC_INIT /* */ +#endif + +#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } + +#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) + +/* + * On x86, we implement read-write locks as a 32-bit counter + * with the high bit (sign) being the "contended" bit. + * + * The inline assembly is non-obvious. Think about it. + * + * Changed to use the same technique as rw semaphores. See + * semaphore.h for details. -ben + */ +/* the spinlock helpers are in arch/x86/kernel/semaphore.c */ + +static inline void read_lock(rwlock_t *rw) +{ +#if SPINLOCK_DEBUG + if (rw->magic != RWLOCK_MAGIC) + BUG(); +#endif + __build_read_lock(rw, "__read_lock_failed"); +} + +static inline void write_lock(rwlock_t *rw) +{ +#if SPINLOCK_DEBUG + if (rw->magic != RWLOCK_MAGIC) + BUG(); +#endif + __build_write_lock(rw, "__write_lock_failed"); +} + +#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") + +static inline int write_trylock(rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + if (atomic_sub_and_test(RW_LOCK_BIAS, count)) + return 1; + atomic_add(RW_LOCK_BIAS, count); + return 0; +} + +#endif /* __ASM_SPINLOCK_H */ diff --git a/xen/include/asm-x86/string.h b/xen/include/asm-x86/string.h new file mode 100644 index 0000000000..bef20a71d5 --- /dev/null +++ b/xen/include/asm-x86/string.h @@ -0,0 +1,500 @@ +#ifndef _I386_STRING_H_ +#define _I386_STRING_H_ + +#ifdef __KERNEL__ +#include +/* + * On a 486 or Pentium, we are better off not using the + * byte string operations. But on a 386 or a PPro the + * byte string ops are faster than doing it by hand + * (MUCH faster on a Pentium). + * + * Also, the byte strings actually work correctly. Forget + * the i486 routines for now as they may be broken.. + */ +#if FIXED_486_STRING && defined(CONFIG_X86_USE_STRING_486) +#include +#else + +/* + * This string-include defines all string functions as inline + * functions. Use gcc. It also assumes ds=es=data space, this should be + * normal. Most of the string-functions are rather heavily hand-optimized, + * see especially strtok,strstr,str[c]spn. They should work, but are not + * very easy to understand. Everything is done entirely within the register + * set, making the functions fast and clean. String instructions have been + * used through-out, making for "slightly" unclear code :-) + * + * NO Copyright (C) 1991, 1992 Linus Torvalds, + * consider these trivial functions to be PD. + */ + + +#define __HAVE_ARCH_STRCPY +static inline char * strcpy(char * dest,const char *src) +{ +int d0, d1, d2; +__asm__ __volatile__( + "1:\tlodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=&S" (d0), "=&D" (d1), "=&a" (d2) + :"0" (src),"1" (dest) : "memory"); +return dest; +} + +#define __HAVE_ARCH_STRNCPY +static inline char * strncpy(char * dest,const char *src,size_t count) +{ +int d0, d1, d2, d3; +__asm__ __volatile__( + "1:\tdecl %2\n\t" + "js 2f\n\t" + "lodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "rep\n\t" + "stosb\n" + "2:" + : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) + :"0" (src),"1" (dest),"2" (count) : "memory"); +return dest; +} + +#define __HAVE_ARCH_STRCAT +static inline char * strcat(char * dest,const char * src) +{ +int d0, d1, d2, d3; +__asm__ __volatile__( + "repne\n\t" + "scasb\n\t" + "decl %1\n" + "1:\tlodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) + : "0" (src), "1" (dest), "2" (0), "3" (0xffffffff):"memory"); +return dest; +} + +#define __HAVE_ARCH_STRNCAT +static inline char * strncat(char * dest,const char * src,size_t count) +{ +int d0, d1, d2, d3; +__asm__ __volatile__( + "repne\n\t" + "scasb\n\t" + "decl %1\n\t" + "movl %8,%3\n" + "1:\tdecl %3\n\t" + "js 2f\n\t" + "lodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:\txorl %2,%2\n\t" + "stosb" + : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) + : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count) + : "memory"); +return dest; +} + +#define __HAVE_ARCH_STRCMP +static inline int strcmp(const char * cs,const char * ct) +{ +int d0, d1; +register int __res; +__asm__ __volatile__( + "1:\tlodsb\n\t" + "scasb\n\t" + "jne 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n\t" + "jmp 3f\n" + "2:\tsbbl %%eax,%%eax\n\t" + "orb $1,%%al\n" + "3:" + :"=a" (__res), "=&S" (d0), "=&D" (d1) + :"1" (cs),"2" (ct)); +return __res; +} + +#define __HAVE_ARCH_STRNCMP +static inline int strncmp(const char * cs,const char * ct,size_t count) +{ +register int __res; +int d0, d1, d2; +__asm__ __volatile__( + "1:\tdecl %3\n\t" + "js 2f\n\t" + "lodsb\n\t" + "scasb\n\t" + "jne 3f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:\txorl %%eax,%%eax\n\t" + "jmp 4f\n" + "3:\tsbbl %%eax,%%eax\n\t" + "orb $1,%%al\n" + "4:" + :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) + :"1" (cs),"2" (ct),"3" (count)); +return __res; +} + +#define __HAVE_ARCH_STRCHR +static inline char * strchr(const char * s, int c) +{ +int d0; +register char * __res; +__asm__ __volatile__( + "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "movl $1,%1\n" + "2:\tmovl %1,%0\n\t" + "decl %0" + :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c)); +return __res; +} + +#define __HAVE_ARCH_STRRCHR +static inline char * strrchr(const char * s, int c) +{ +int d0, d1; +register char * __res; +__asm__ __volatile__( + "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "jne 2f\n\t" + "leal -1(%%esi),%0\n" + "2:\ttestb %%al,%%al\n\t" + "jne 1b" + :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c)); +return __res; +} + +#define __HAVE_ARCH_STRLEN +static inline size_t strlen(const char * s) +{ +int d0; +register int __res; +__asm__ __volatile__( + "repne\n\t" + "scasb\n\t" + "notl %0\n\t" + "decl %0" + :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff)); +return __res; +} + +static inline void * __memcpy(void * to, const void * from, size_t n) +{ +int d0, d1, d2; +__asm__ __volatile__( + "rep ; movsl\n\t" + "testb $2,%b4\n\t" + "je 1f\n\t" + "movsw\n" + "1:\ttestb $1,%b4\n\t" + "je 2f\n\t" + "movsb\n" + "2:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) + : "memory"); +return (to); +} + +/* + * This looks horribly ugly, but the compiler can optimize it totally, + * as the count is constant. + */ +static inline void * __constant_memcpy(void * to, const void * from, size_t n) +{ + switch (n) { + case 0: + return to; + case 1: + *(unsigned char *)to = *(const unsigned char *)from; + return to; + case 2: + *(unsigned short *)to = *(const unsigned short *)from; + return to; + case 3: + *(unsigned short *)to = *(const unsigned short *)from; + *(2+(unsigned char *)to) = *(2+(const unsigned char *)from); + return to; + case 4: + *(unsigned long *)to = *(const unsigned long *)from; + return to; + case 6: /* for Ethernet addresses */ + *(unsigned long *)to = *(const unsigned long *)from; + *(2+(unsigned short *)to) = *(2+(const unsigned short *)from); + return to; + case 8: + *(unsigned long *)to = *(const unsigned long *)from; + *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); + return to; + case 12: + *(unsigned long *)to = *(const unsigned long *)from; + *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); + *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); + return to; + case 16: + *(unsigned long *)to = *(const unsigned long *)from; + *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); + *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); + *(3+(unsigned long *)to) = *(3+(const unsigned long *)from); + return to; + case 20: + *(unsigned long *)to = *(const unsigned long *)from; + *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); + *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); + *(3+(unsigned long *)to) = *(3+(const unsigned long *)from); + *(4+(unsigned long *)to) = *(4+(const unsigned long *)from); + return to; + } +#define COMMON(x) \ +__asm__ __volatile__( \ + "rep ; movsl" \ + x \ + : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ + : "0" (n/4),"1" ((long) to),"2" ((long) from) \ + : "memory"); +{ + int d0, d1, d2; + switch (n % 4) { + case 0: COMMON(""); return to; + case 1: COMMON("\n\tmovsb"); return to; + case 2: COMMON("\n\tmovsw"); return to; + default: COMMON("\n\tmovsw\n\tmovsb"); return to; + } +} + +#undef COMMON +} + +#define __HAVE_ARCH_MEMCPY + +#define memcpy(t, f, n) \ +(__builtin_constant_p(n) ? \ + __constant_memcpy((t),(f),(n)) : \ + __memcpy((t),(f),(n))) + + +/* + * struct_cpy(x,y), copy structure *x into (matching structure) *y. + * + * We get link-time errors if the structure sizes do not match. + * There is no runtime overhead, it's all optimized away at + * compile time. + */ +//extern void __struct_cpy_bug (void); + +/* +#define struct_cpy(x,y) \ +({ \ + if (sizeof(*(x)) != sizeof(*(y))) \ + __struct_cpy_bug; \ + memcpy(x, y, sizeof(*(x))); \ +}) +*/ + +#define __HAVE_ARCH_MEMMOVE +static inline void * memmove(void * dest,const void * src, size_t n) +{ +int d0, d1, d2; +if (dest +#include + +/* Clear and set 'TS' bit respectively */ +#define clts() __asm__ __volatile__ ("clts") +#define stts() write_cr0(X86_CR0_TS|read_cr0()) + +#define wbinvd() \ + __asm__ __volatile__ ("wbinvd": : :"memory"); + +static inline unsigned long get_limit(unsigned long segment) +{ + unsigned long __limit; + __asm__("lsll %1,%0" + :"=r" (__limit):"r" (segment)); + return __limit+1; +} + +#define nop() __asm__ __volatile__ ("nop") + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) + +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((struct __xchg_dummy *)(x)) + + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; +#ifdef x86_32 + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; +#else + case 4: + __asm__ __volatile__("xchgl %k0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 8: + __asm__ __volatile__("xchgq %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; +#endif + } + return x; +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; +#ifdef x86_32 + case 4: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; +#else + case 4: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; +#endif + } + return old; +} + +#define cmpxchg(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) + + +/* + * This function causes longword _o to be changed to _n at location _p. + * If this access causes a fault then we return 1, otherwise we return 0. + * If no fault occurs then _o is updated to teh value we saw at _p. If this + * is the same as the initial value of _o then _n is written to location _p. + */ +#define cmpxchg_user(_p,_o,_n) \ +({ \ + int _rc; \ + __asm__ __volatile__ ( \ + "1: " LOCK_PREFIX "cmpxchg"__OS" %2,%3\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $1,%1\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,3b\n" \ + ".previous" \ + : "=a" (_o), "=r" (_rc) \ + : "q" (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \ + : "memory"); \ + _rc; \ +}) + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + * + * For now, "wmb()" doesn't actually do anything, as all + * Intel CPU's follow what Intel calls a *Processor Order*, + * in which all writes are seen in the program order even + * outside the CPU. + * + * I expect future Intel CPU's to have a weaker ordering, + * but I'd also expect them to finally get their act together + * and add some real memory barriers if so. + * + * Some non intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ + +#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define rmb() mb() + +#ifdef CONFIG_X86_OOSTORE +#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#else +#define wmb() __asm__ __volatile__ ("": : :"memory") +#endif + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#endif + +#define set_mb(var, value) do { xchg(&var, value); } while (0) +#define set_wmb(var, value) do { var = value; wmb(); } while (0) + +/* interrupt control.. */ +#ifdef x86_64 +#define __save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) +#define __restore_flags(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") +#else +#define __save_flags(x) __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */) +#define __restore_flags(x) __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc") +#endif +#define __cli() __asm__ __volatile__("cli": : :"memory") +#define __sti() __asm__ __volatile__("sti": : :"memory") +/* used in the idle loop; sti takes one instruction cycle to complete */ +#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") + +/* For spinlocks etc */ +#ifdef x86_64 +#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) +#define local_irq_restore(x) __asm__ __volatile__("# local_irq_restore \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory") +#else +#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") +#define local_irq_restore(x) __restore_flags(x) +#endif +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() + +static inline int local_irq_is_enabled(void) +{ + unsigned long flags; + __save_flags(flags); + return !!(flags & (1<<9)); /* EFLAGS_IF */ +} + +#ifdef CONFIG_SMP + +extern void __global_cli(void); +extern void __global_sti(void); +extern unsigned long __global_save_flags(void); +extern void __global_restore_flags(unsigned long); +#define cli() __global_cli() +#define sti() __global_sti() +#define save_flags(x) ((x)=__global_save_flags()) +#define restore_flags(x) __global_restore_flags(x) + +#else + +#define cli() __cli() +#define sti() __sti() +#define save_flags(x) __save_flags(x) +#define restore_flags(x) __restore_flags(x) + +#endif + +/* + * disable hlt during certain critical i/o operations + */ +#define HAVE_DISABLE_HLT +void disable_hlt(void); +void enable_hlt(void); + +#define BROKEN_ACPI_Sx 0x0001 +#define BROKEN_INIT_AFTER_S1 0x0002 + +#endif diff --git a/xen/include/asm-x86/time.h b/xen/include/asm-x86/time.h new file mode 100644 index 0000000000..ed3a15bfb2 --- /dev/null +++ b/xen/include/asm-x86/time.h @@ -0,0 +1,21 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: time.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * + * Environment: Xen Hypervisor + * Description: Architecture dependent definition of time variables + */ + +#ifndef _ASM_TIME_H_ +#define _ASM_TIME_H_ + +#include +#include + +typedef s64 s_time_t; /* system time */ + +#endif /* _ASM_TIME_H_ */ diff --git a/xen/include/asm-x86/timex.h b/xen/include/asm-x86/timex.h new file mode 100644 index 0000000000..4b0a93fc87 --- /dev/null +++ b/xen/include/asm-x86/timex.h @@ -0,0 +1,58 @@ +/* + * linux/include/asm-i386/timex.h + * + * i386 architecture timex specifications + */ +#ifndef _ASMi386_TIMEX_H +#define _ASMi386_TIMEX_H + +#include +#include + +#ifdef CONFIG_MELAN +# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */ +#else +# define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ +#endif + +#define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */ +#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ + (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ + << (SHIFT_SCALE-SHIFT_HZ)) / HZ) + +/* + * Standard way to access the cycle counter on i586+ CPUs. + * Currently only used on SMP. + * + * If you really have a SMP machine with i486 chips or older, + * compile for that, and this will just always return zero. + * That's ok, it just means that the nicer scheduling heuristics + * won't work for you. + * + * We only use the low 32 bits, and we'd simply better make sure + * that we reschedule before that wraps. Scheduling at least every + * four billion cycles just basically sounds like a good idea, + * regardless of how fast the machine is. + */ +typedef unsigned long long cycles_t; + +extern cycles_t cacheflush_time; + +static inline cycles_t get_cycles (void) +{ +#ifndef CONFIG_X86_TSC + return 0; +#else + unsigned long long ret; + + rdtscll(ret); + return ret; +#endif +} + +extern unsigned long cpu_khz; + +#define vxtime_lock() do {} while (0) +#define vxtime_unlock() do {} while (0) + +#endif diff --git a/xen/include/asm-x86/types.h b/xen/include/asm-x86/types.h new file mode 100644 index 0000000000..adb63db4d1 --- /dev/null +++ b/xen/include/asm-x86/types.h @@ -0,0 +1,58 @@ +#ifndef _X86_TYPES_H +#define _X86_TYPES_H + +typedef unsigned short umode_t; + +typedef unsigned int size_t; + +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +#ifdef x86_32 +typedef __signed__ long long __s64; +typedef unsigned long long __u64; +#else +typedef __signed__ long __s64; +typedef unsigned long __u64; +#endif +#endif + +#include + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +#ifdef x86_32 +typedef signed long long s64; +typedef unsigned long long u64; +#define BITS_PER_LONG 32 +#else +typedef signed long s64; +typedef unsigned long u64; +#define BITS_PER_LONG 64 +#endif + +/* DMA addresses come in generic and 64-bit flavours. */ + +typedef unsigned long dma_addr_t; +typedef u64 dma64_addr_t; + +#endif diff --git a/xen/include/asm-x86/uaccess.h b/xen/include/asm-x86/uaccess.h new file mode 100644 index 0000000000..bb2616336d --- /dev/null +++ b/xen/include/asm-x86/uaccess.h @@ -0,0 +1,600 @@ +#ifndef __i386_UACCESS_H +#define __i386_UACCESS_H + +/* + * User space memory access functions + */ +#include +#include +#include +#include +#include + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + + +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) +#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current->addr_limit) +#define set_fs(x) (current->addr_limit = (x)) + +#define segment_eq(a,b) ((a).seg == (b).seg) + +extern int __verify_write(const void *, unsigned long); + +#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg)) + +/* + * Uhhuh, this needs 33-bit arithmetic. We have a carry.. + */ +#define __range_ok(addr,size) ({ \ + unsigned long flag,sum; \ + asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \ + :"=&r" (flag), "=r" (sum) \ + :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \ + flag; }) + +#define access_ok(type,addr,size) (__range_ok(addr,size) == 0) + +static inline int verify_area(int type, const void * addr, unsigned long size) +{ + return access_ok(type,addr,size) ? 0 : -EFAULT; +} + + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + +/* Returns 0 if exception not found and fixup otherwise. */ +extern unsigned long search_exception_table(unsigned long); + + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * This gets kind of ugly. We want to return _two_ values in "get_user()" + * and yet we don't want to do any pointers, because that is too much + * of a performance impact. Thus we have a few rather ugly macros here, + * and hide all the uglyness from the user. + * + * The "__xxx" versions of the user access functions are versions that + * do not verify the address space, that must have been done previously + * with a separate "access_ok()" call (this is used when we do multiple + * accesses to the same area of user memory). + */ + +extern void __get_user_1(void); +extern void __get_user_2(void); +extern void __get_user_4(void); + +#define __get_user_x(size,ret,x,ptr) \ + __asm__ __volatile__("call __get_user_" #size \ + :"=a" (ret),"=d" (x) \ + :"0" (ptr)) + +/* Careful: we have to cast the result to the type of the pointer for sign reasons */ +#define get_user(x,ptr) \ +({ int __ret_gu=1,__val_gu; \ + switch(sizeof (*(ptr))) { \ + case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1); break; \ + case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2); break; \ + case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4); break; \ + default: __ret_gu=copy_from_user(&__val_gu,ptr,8); break; \ + /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \ + /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \ + /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \ + /*default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;*/ \ + } \ + (x) = (__typeof__(*(ptr)))__val_gu; \ + __ret_gu; \ +}) + +extern void __put_user_1(void); +extern void __put_user_2(void); +extern void __put_user_4(void); +extern void __put_user_8(void); + +extern void __put_user_bad(void); + +#define put_user(x,ptr) \ + __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +#define __get_user(x,ptr) \ + __get_user_nocheck((x),(ptr),sizeof(*(ptr))) +#define __put_user(x,ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +#define __put_user_nocheck(x,ptr,size) \ +({ \ + long __pu_err; \ + __put_user_size((x),(ptr),(size),__pu_err); \ + __pu_err; \ +}) + + +#define __put_user_check(x,ptr,size) \ +({ \ + long __pu_err = -EFAULT; \ + __typeof__(*(ptr)) *__pu_addr = (ptr); \ + if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ + __put_user_size((x),__pu_addr,(size),__pu_err); \ + __pu_err; \ +}) + +#define __put_user_u64(x, addr, err) \ + __asm__ __volatile__( \ + "1: movl %%eax,0(%2)\n" \ + "2: movl %%edx,4(%2)\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: movl %3,%0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,4b\n" \ + " .long 2b,4b\n" \ + ".previous" \ + : "=r"(err) \ + : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err)) + +#define __put_user_size(x,ptr,size,retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \ + case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \ + case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break; \ + case 8: __put_user_u64(x,ptr,retval); break; \ + default: __put_user_bad(); \ + } \ +} while (0) + +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct *)(x)) + +/* + * Tell gcc we read from memory instead of writing: this is because + * we do not write to any memory gcc knows about, so there are no + * aliasing issues. + */ +#define __put_user_asm(x, addr, err, itype, rtype, ltype) \ + __asm__ __volatile__( \ + "1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %3,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,3b\n" \ + ".previous" \ + : "=r"(err) \ + : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err)) + + +#define __get_user_nocheck(x,ptr,size) \ +({ \ + long __gu_err, __gu_val; \ + __get_user_size(__gu_val,(ptr),(size),__gu_err); \ + (x) = (__typeof__(*(ptr)))__gu_val; \ + __gu_err; \ +}) + +extern long __get_user_bad(void); + +#define __get_user_size(x,ptr,size,retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \ + case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \ + case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break; \ + default: (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm(x, addr, err, itype, rtype, ltype) \ + __asm__ __volatile__( \ + "1: mov"itype" %2,%"rtype"1\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %3,%0\n" \ + " xor"itype" %"rtype"1,%"rtype"1\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,3b\n" \ + ".previous" \ + : "=r"(err), ltype (x) \ + : "m"(__m(addr)), "i"(-EFAULT), "0"(err)) + + +/* + * Copy To/From Userspace + */ + +/* Generic arbitrary sized copy. */ +#define __copy_user(to,from,size) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ + : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +#define __copy_user_zeroing(to,from,size) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + "4: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ + : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +/* We let the __ versions of copy_from/to_user inline, because they're often + * used in fast paths and have only a small space overhead. + */ +static inline unsigned long +__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n) +{ + __copy_user_zeroing(to,from,n); + return n; +} + +static inline unsigned long +__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n) +{ + __copy_user(to,from,n); + return n; +} + + +/* Optimize just a little bit when we know the size of the move. */ +#define __constant_copy_user(to, from, size) \ +do { \ + int __d0, __d1; \ + switch (size & 3) { \ + default: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1:\n" \ + ".section .fixup,\"ax\"\n" \ + "2: shl $2,%0\n" \ + " jmp 1b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,2b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 1: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: shl $2,%0\n" \ + "4: incl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 2: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: shl $2,%0\n" \ + "4: addl $2,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 3: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2: movsb\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: shl $2,%0\n" \ + "5: addl $2,%0\n" \ + "6: incl %0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,4b\n" \ + " .long 1b,5b\n" \ + " .long 2b,6b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + } \ +} while (0) + +/* Optimize just a little bit when we know the size of the move. */ +#define __constant_copy_user_zeroing(to, from, size) \ +do { \ + int __d0, __d1; \ + switch (size & 3) { \ + default: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1:\n" \ + ".section .fixup,\"ax\"\n" \ + "2: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " jmp 1b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,2b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 1: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " incl %0\n" \ + " jmp 2b\n" \ + "4: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " incl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 2: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " stosw\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " addl $2,%0\n" \ + " jmp 2b\n" \ + "4: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosw\n" \ + " popl %%eax\n" \ + " addl $2,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 3: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2: movsb\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " stosw\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " addl $3,%0\n" \ + " jmp 2b\n" \ + "5: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosw\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " addl $3,%0\n" \ + " jmp 2b\n" \ + "6: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " incl %0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,4b\n" \ + " .long 1b,5b\n" \ + " .long 2b,6b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + } \ +} while (0) + +unsigned long __generic_copy_to_user(void *, const void *, unsigned long); +unsigned long __generic_copy_from_user(void *, const void *, unsigned long); + +static inline unsigned long +__constant_copy_to_user(void *to, const void *from, unsigned long n) +{ + prefetch(from); + if (access_ok(VERIFY_WRITE, to, n)) + __constant_copy_user(to,from,n); + return n; +} + +static inline unsigned long +__constant_copy_from_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + __constant_copy_user_zeroing(to,from,n); + else + memset(to, 0, n); + return n; +} + +static inline unsigned long +__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n) +{ + __constant_copy_user(to,from,n); + return n; +} + +static inline unsigned long +__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n) +{ + __constant_copy_user_zeroing(to,from,n); + return n; +} + +#define copy_to_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_to_user((to),(from),(n)) : \ + __generic_copy_to_user((to),(from),(n))) + +#define copy_from_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_from_user((to),(from),(n)) : \ + __generic_copy_from_user((to),(from),(n))) + +#define __copy_to_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_to_user_nocheck((to),(from),(n)) : \ + __generic_copy_to_user_nocheck((to),(from),(n))) + +#define __copy_from_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_from_user_nocheck((to),(from),(n)) : \ + __generic_copy_from_user_nocheck((to),(from),(n))) + +long strncpy_from_user(char *dst, const char *src, long count); +long __strncpy_from_user(char *dst, const char *src, long count); +#define strlen_user(str) strnlen_user(str, ~0UL >> 1) +long strnlen_user(const char *str, long n); +unsigned long clear_user(void *mem, unsigned long len); +unsigned long __clear_user(void *mem, unsigned long len); + +#endif /* __i386_UACCESS_H */ diff --git a/xen/include/asm-x86/unaligned.h b/xen/include/asm-x86/unaligned.h new file mode 100644 index 0000000000..08f8ca2d9b --- /dev/null +++ b/xen/include/asm-x86/unaligned.h @@ -0,0 +1,37 @@ +#ifndef __X86_UNALIGNED_H +#define __X86_UNALIGNED_H + +/* + * x86 can do unaligned accesses itself. + * + * The strange macros are there to make sure these can't + * be misused in a way that makes them not work on other + * architectures where unaligned accesses aren't as simple. + */ + +/** + * get_unaligned - get value from possibly mis-aligned location + * @ptr: pointer to value + * + * This macro should be used for accessing values larger in size than + * single bytes at locations that are expected to be improperly aligned, + * e.g. retrieving a u16 value from a location not u16-aligned. + * + * Note that unaligned accesses can be very expensive on some architectures. + */ +#define get_unaligned(ptr) (*(ptr)) + +/** + * put_unaligned - put value to a possibly mis-aligned location + * @val: value to place + * @ptr: pointer to location + * + * This macro should be used for placing values larger in size than + * single bytes at locations that are expected to be improperly aligned, + * e.g. writing a u16 value to a location not u16-aligned. + * + * Note that unaligned accesses can be very expensive on some architectures. + */ +#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) + +#endif diff --git a/xen/include/asm-x86/x86_64/config.h b/xen/include/asm-x86/x86_64/config.h new file mode 100644 index 0000000000..5a0acabf2a --- /dev/null +++ b/xen/include/asm-x86/x86_64/config.h @@ -0,0 +1,292 @@ +/****************************************************************************** + * config.h + * + * A Linux-style configuration list. + * + */ + +#ifndef __XEN_X86_64_CONFIG_H__ +#define __XEN_X86_64_CONFIG_H__ + +#define CONFIG_X86 1 +#define CONFIG_X86_64BITMODE 1 + +#define CONFIG_SMP 1 +#define CONFIG_X86_LOCAL_APIC 1 +#define CONFIG_X86_IO_APIC 1 +#define CONFIG_X86_L1_CACHE_SHIFT 5 + +#define CONFIG_PCI 1 +#define CONFIG_PCI_BIOS 1 +#define CONFIG_PCI_DIRECT 1 + +#define CONFIG_IDE 1 +#define CONFIG_BLK_DEV_IDE 1 +#define CONFIG_BLK_DEV_IDEDMA 1 +#define CONFIG_BLK_DEV_IDEPCI 1 +#define CONFIG_IDEDISK_MULTI_MODE 1 +#define CONFIG_IDEDISK_STROKE 1 +#define CONFIG_IDEPCI_SHARE_IRQ 1 +#define CONFIG_BLK_DEV_IDEDMA_PCI 1 +#define CONFIG_IDEDMA_PCI_AUTO 1 +#define CONFIG_IDEDMA_AUTO 1 +#define CONFIG_IDEDMA_ONLYDISK 1 +#define CONFIG_BLK_DEV_IDE_MODES 1 +#define CONFIG_BLK_DEV_PIIX 1 + +#define CONFIG_SCSI 1 +#define CONFIG_SCSI_LOGGING 1 +#define CONFIG_BLK_DEV_SD 1 +#define CONFIG_SD_EXTRA_DEVS 40 +#define CONFIG_SCSI_MULTI_LUN 1 + +#define CONFIG_XEN_ATTENTION_KEY 1 + +#define HZ 100 + +/* + * Just to keep compiler happy. + * NB. DO NOT CHANGE SMP_CACHE_BYTES WITHOUT FIXING arch/i386/entry.S!!! + * It depends on size of irq_cpustat_t, for example, being 64 bytes. :-) + * Mmmm... so niiiiiice.... + */ +#define SMP_CACHE_BYTES 64 +#define NR_CPUS 16 +#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +#define ____cacheline_aligned __cacheline_aligned + +#define PHYSICAL_ADDRESS_BITS 52 +#define MAX_PHYSICAL_ADDRESS (1 << PHYSICAL_ADDRESS_BITS) +#define VIRTUAL_ADDRESS_BITS 48 +#define XEN_PAGE_SIZE 4096 + +#define PTE_SIZE 8 +#define TOTAL_PTES (512ULL * 512 * 512 * 512) + +/* next PML4 from an _END address */ +#define PML4_BITS 39 +#define PML4_SPACE (1ULL << PML4_BITS) + +/* + * Memory layout + * + * 0x0000000000000000 - 0x00007fffffffffff Guest & user apps (128TB) + * (Only for 32-bit guests) + * 0x00000000fc000000 - 0x00000000fc3fffff Machine/Physical 32-bit shadow (4MB) + * 0x00000000fc400000 - 0x00000000feffffff IO remap for 32-bit guests (44MB) + * 0x00000000ff000000 - 0x00000000ff3fffff 32-bit PTE shadow (4MB) + * + * 0xffff800000000000 - 0xffff807fffffffff Linear page table (512GB) + * 0xffff808000000000 - 0xffff80ffffffffff Reserved for shadow page table (512GB) + * + * 0xffff810000000000 - 0xffff82ffffffffff Xen PML4 slots + * 0xffff810000000000 - 0xffff81003fffffff Xen hypervisor virtual space (1GB) + * 0xffff810040000000 - 0xffff81807fffffff Per-domain mappings (1GB) + * 0xffff810080000000 - 0xffff81387fffffff R/O physical map (224GB) + * 0xffff813880000000 - 0xffff81707fffffff R/W physical map (224GB) + * 0xffff817080000000 - 0xffff82c07fffffff Frame table (1344GB) + * 0xffff82c080000000 - 0xffff82c0bfffffff I/O remap space (1GB) + * 0xffff82c0c0000000 - 0xffff82ffffffffff (253GB) + * + * 0xffff830000000000 - 0xffff87ffffffffff RESERVED (5TB) + * + * 0xffff880000000000 - ... Physical 1:1 direct mapping (112TB max) + * 0xffff880000000000 - 0xffff880001000000 Low memory DMA region (16M) + * + * 0xfffff80000000000 - 0xffffffffffffffff Reserved for guest (8TB) + * + * The requirement that we have a 1:1 map of physical memory limits + * the maximum memory size we can support. With only 48 virtual address + * bits, and the assumption that guests will run users in positive address + * space, a contiguous 1:1 map can only live in the negative address space. + * Since we don't want to bump guests out of the very top of memory and + * force relocation, we can't use this entire space, and Xen has several + * heavy mapping that require PML4 slices. Just to be safe, we reserve + * 16 PML4s each for Xen and the guest. 224 PML4s give us 112 terabytes + * of addressable memory. Any high device physical addresses beyond this + * region can be mapped into the IO remap space or some of the reserved + * 6TB region. + * + * 112 TB is just 16 TB shy of the maximum physical memory supported + * on Linux 2.6.0, and should be enough for anybody. + * + * There are some additional constraints in the memory layout that require + * several changes from the i386 architecture. + * + * ACPI data and ACPI non-volatile storage must be placed in some region + * of memory below the 4GB mark. Depending on the BIOS and system, we + * may have this located as low as 1GB. This means allocating large + * chunks of physically contiguous memory from the direct mapping may not + * be possible. + * + * The full frame table for 112TB of physical memory currently occupies + * 1344GB space. This clearly can not be allocated in physically contiguous + * space, so it must be moved to a virtual address. + * + * Both copies of the machine->physical table must also be relocated. + * (112 TB / 4k) * 8 bytes means that each copy of the physical map requires + * 224GB of space, thus it also must move to VM space. + * + * The physical pages used to allocate the page tables for the direct 1:1 + * map may occupy (112TB / 2M) * 8 bytes = 448MB. This is almost guaranteed + * to fit in contiguous physical memory, but these pages used to be allocated + * in the Xen monitor address space. This means the Xen address space must + * accomodate up to ~500 MB, which means it also must move out of the + * direct mapped region. + * + * Since both copies of the MPT, the frame table, and Xen now exist in + * purely virtual space, we have the added advantage of being able to + * map them to local pages on NUMA machines, or use NUMA aware memory + * allocation within Xen itself. + * + * Additionally, the 1:1 page table now exists contiguously in virtual + * space, but may be mapped to physically separated pages, allowing + * each node to contain the page tables for its own local memory. Setting + * up this mapping presents a bit of a chicken-egg problem, but is possible + * as a future enhancement. + * + * Zachary Amsden (zamsden@cisco.com) + * + */ + +/* Guest and user space */ +#define NSPACE_VIRT_START 0 +#define NSPACE_VIRT_END (1ULL << (VIRTUAL_ADDRESS_BITS - 1)) + +/* Priviledged space */ +#define ESPACE_VIRT_END 0 +#define ESPACE_VIRT_START (ESPACE_VIRT_END-(1ULL << (VIRTUAL_ADDRESS_BITS-1))) + +/* reservations in e-space */ +#define GUEST_RESERVED_PML4S 16 +#define XEN_RESERVED_PML4S 16 + +#define MAX_MEMORY_SIZE ((1ULL << (VIRTUAL_ADDRESS_BITS-1)) \ + -((GUEST_RESERVED_PML4S + XEN_RESERVED_PML4S) * PML4_SPACE)) +#define MAX_MEMORY_FRAMES (MAX_MEMORY_SIZE / XEN_PAGE_SIZE) + +/* + * Virtual addresses beyond this are not modifiable by guest OSes. + */ +#define HYPERVISOR_VIRT_START ESPACE_VIRT_START +#define HYPERVISOR_VIRT_END (ESPACE_VIRT_END-(GUEST_RESERVED_PML4S * PML4_SPACE)) + +/* First 512GB of virtual address space is used as a linear p.t. mapping. */ +#define LINEAR_PT_VIRT_START (HYPERVISOR_VIRT_START) +#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + (PTE_SIZE * TOTAL_PTES)) + +/* Reserve some space for a shadow PT mapping */ +#define SHADOW_PT_VIRT_START (LINEAR_PT_VIRT_END) +#define SHADOW_PT_VIRT_END (SHADOW_PT_VIRT_START + (PTE_SIZE * TOTAL_PTES)) + +/* Xen exists in the first 1GB of the next PML4 space */ +#define MAX_MONITOR_ADDRESS (1 * 1024 * 1024 * 1024) +#define MONITOR_VIRT_START (SHADOW_PT_VIRT_END) +#define MONITOR_VIRT_END (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS) + +/* Next 1GB of virtual address space used for per-domain mappings (eg. GDT). */ +#define PERDOMAIN_VIRT_START (MONITOR_VIRT_END) +#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (512 * 512 * 4096)) +#define GDT_VIRT_START (PERDOMAIN_VIRT_START) +#define GDT_VIRT_END (GDT_VIRT_START + (128*1024)) +#define LDT_VIRT_START (GDT_VIRT_END) +#define LDT_VIRT_END (LDT_VIRT_START + (128*1024)) + +/* + * First set of MPTs are mapped read-only for all. It's for the machine->physical + * mapping table (MPT table). The following are virtual addresses. + */ +#define READONLY_MPT_VIRT_START (PERDOMAIN_VIRT_END) +#define READONLY_MPT_VIRT_END (READONLY_MPT_VIRT_START + (PTE_SIZE * MAX_MEMORY_FRAMES)) + +/* R/W machine->physical table */ +#define RDWR_MPT_VIRT_START (READONLY_MPT_VIRT_END) +#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (PTE_SIZE * MAX_MEMORY_FRAMES)) + +/* Frame table */ +#define FRAMETABLE_ENTRY_SIZE (48) +#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END) +#define FRAMETABLE_VIRT_END (FRAMETABLE_VIRT_START + (FRAMETABLE_ENTRY_SIZE * MAX_MEMORY_FRAMES)) + +/* Next 1GB of virtual address space used for ioremap(). */ +#define IOREMAP_VIRT_START (FRAMETABLE_VIRT_END) +#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (512 * 512 * 4096)) + +/* And the virtual addresses for the direct-map region... */ +#define DIRECTMAP_VIRT_START (ESPACE_VIRT_START + (XEN_RESERVED_PML4S * PML4_SPACE)) +#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS) + +/* + * Next is the direct-mapped memory region. The following are machine addresses. + */ +#define MAX_DMA_ADDRESS (16*1024*1024) +#define MAX_DIRECTMAP_ADDRESS MAX_MEMORY_SIZE + + + +/* + * Amount of slack domain memory to leave in system, in kilobytes. + * Prevents a hard out-of-memory crunch for thinsg like network receive. + */ +#define SLACK_DOMAIN_MEM_KILOBYTES 2048 + + +/* + * These will probably change in the future.. + * locations for 32-bit guest compatibility mappings + */ + +/* 4M of 32-bit machine-physical shadow in low 4G of VM space */ +#define SHADOW_MPT32_VIRT_START (0xfc000000) +#define SHADOW_MPT32_VIRT_END (SHADOW_MPT32_VIRT_START + (4 * 1024 * 1024)) + +/* 44M of I/O remap for 32-bit drivers */ +#define IOREMAP_LOW_VIRT_START (SHADOW_MPT32_VIRT_END) +#define IOREMAP_LOW_VIRT_END (IOREMAP_LOW_VIRT_START + (44 * 1024 * 1024)) + +/* 4M of 32-bit page table */ +#define SHADOW_PT32_VIRT_START (IOREMAP_LOW_VIRT_END) +#define SHADOW_PT32_VIRT_END (SHADOW_PT32_VIRT_START + (4 * 1024 * 1024)) + + +/* Linkage for x86 */ +#define FASTCALL(x) x __attribute__((regparm(3))) +#define asmlinkage __attribute__((regparm(0))) +#define __ALIGN .align 16,0x90 +#define __ALIGN_STR ".align 16,0x90" +#define SYMBOL_NAME_STR(X) #X +#define SYMBOL_NAME(X) X +#define SYMBOL_NAME_LABEL(X) X##: +#ifdef __ASSEMBLY__ +#define ALIGN __ALIGN +#define ALIGN_STR __ALIGN_STR +#define ENTRY(name) \ + .globl SYMBOL_NAME(name); \ + ALIGN; \ + SYMBOL_NAME_LABEL(name) +#endif + +#define PGT_base_page_table PGT_l4_page_table + +#define barrier() __asm__ __volatile__("": : :"memory") + +/* + * Hypervisor segment selectors + */ +#define __HYPERVISOR_CS64 0x0810 +#define __HYPERVISOR_CS32 0x0808 +#define __HYPERVISOR_DS 0x0818 + +#define NR_syscalls 256 + +#ifndef NDEBUG +#define MEMORY_GUARD +#endif + +#ifndef __ASSEMBLY__ +extern unsigned long _end; /* standard ELF symbol */ +extern void __out_of_line_bug(int line) __attribute__((noreturn)); +#define out_of_line_bug() __out_of_line_bug(__LINE__) +#endif /* __ASSEMBLY__ */ + +#endif /* __XEN_X86_64_CONFIG_H__ */ diff --git a/xen/include/asm-x86/x86_64/current.h b/xen/include/asm-x86/x86_64/current.h new file mode 100644 index 0000000000..d5ffb0720a --- /dev/null +++ b/xen/include/asm-x86/x86_64/current.h @@ -0,0 +1,63 @@ +#ifndef _X86_64_CURRENT_H +#define _X86_64_CURRENT_H + +#if !defined(__ASSEMBLY__) +struct task_struct; + +#include + +#define STACK_RESERVED \ + (sizeof(execution_context_t)) + +static inline struct task_struct * get_current(void) +{ + struct task_struct *current; + current = read_pda(pcurrent); + return current; +} + +#define current get_current() + +static inline void set_current(struct task_struct *p) +{ + write_pda(pcurrent,p); +} + +static inline execution_context_t *get_execution_context(void) +{ + execution_context_t *execution_context; + __asm__( "andq %%rsp,%0; addq %2,%0" + : "=r" (execution_context) + : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) ); + return execution_context; +} + +static inline unsigned long get_stack_top(void) +{ + unsigned long p; + __asm__ ( "orq %%rsp,%0; andq $~7,%0" + : "=r" (p) : "0" (STACK_SIZE-8) ); + return p; +} + +#define schedule_tail(_p) \ + __asm__ __volatile__ ( \ + "andq %%rsp,%0; addq %2,%0; movq %0,%%rsp; jmp *%1" \ + : : "r" (~(STACK_SIZE-1)), \ + "r" (unlikely(is_idle_task((_p))) ? \ + continue_cpu_idle_loop : \ + continue_nonidle_task), \ + "i" (STACK_SIZE-STACK_RESERVED) ) + + +#else + +#ifndef ASM_OFFSET_H +#include +#endif + +#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg + +#endif + +#endif /* !(_X86_64_CURRENT_H) */ diff --git a/xen/include/asm-x86/x86_64/desc.h b/xen/include/asm-x86/x86_64/desc.h new file mode 100644 index 0000000000..e8556e976e --- /dev/null +++ b/xen/include/asm-x86/x86_64/desc.h @@ -0,0 +1,118 @@ +#ifndef __ARCH_DESC_H +#define __ARCH_DESC_H + +#define LDT_ENTRY_SIZE 16 + +#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY + +#define __FIRST_PER_CPU_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8) + +#define __CPU_DESC_INDEX(x,field) \ + ((x) * sizeof(struct per_cpu_gdt) + offsetof(struct per_cpu_gdt, field) + (__FIRST_PER_CPU_ENTRY*8)) +#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY) + +#define load_TR(cpu) asm volatile("ltr %w0"::"r" (__CPU_DESC_INDEX(cpu, tss))); +#define __load_LDT(cpu) asm volatile("lldt %w0"::"r" (__CPU_DESC_INDEX(cpu, ldt))); +#define clear_LDT(n) asm volatile("lldt %w0"::"r" (0)) + +/* + * Guest OS must provide its own code selectors, or use the one we provide. The + * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector + * value is okay. Note that checking only the RPL is insufficient: if the + * selector is poked into an interrupt, trap or call gate then the RPL is + * ignored when the gate is accessed. + */ +#define VALID_SEL(_s) \ + (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \ + (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \ + ((_s)&4)) && \ + (((_s)&3) == 0)) +#define VALID_CODESEL(_s) ((_s) == FLAT_RING3_CS || VALID_SEL(_s)) + +/* These are bitmasks for the first 32 bits of a descriptor table entry. */ +#define _SEGMENT_TYPE (15<< 8) +#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */ +#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */ +#define _SEGMENT_P ( 1<<15) /* Segment Present */ +#define _SEGMENT_G ( 1<<23) /* Granularity */ + +#ifndef __ASSEMBLY__ + +enum { + GATE_INTERRUPT = 0xE, + GATE_TRAP = 0xF, + GATE_CALL = 0xC, +}; + +// 16byte gate +struct gate_struct { + u16 offset_low; + u16 segment; + unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; + u16 offset_middle; + u32 offset_high; + u32 zero1; +} __attribute__((packed)); + +// 8 byte segment descriptor +struct desc_struct { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; + unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; +} __attribute__((packed)); + +// LDT or TSS descriptor in the GDT. 16 bytes. +struct ldttss_desc { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 5, dpl : 2, p : 1; + unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; + u32 base3; + u32 zero1; +} __attribute__((packed)); + +// Union of above structures +union desc_union { + struct desc_struct seg; + struct ldttss_desc ldttss; + struct gate_struct gate; +}; + +struct per_cpu_gdt { + struct ldttss_desc tss; + struct ldttss_desc ldt; +} ____cacheline_aligned; + + +struct Xgt_desc_struct { + unsigned short size; + unsigned long address; +} __attribute__((packed)); + +extern __u8 gdt_table[]; +extern __u8 gdt_end[]; +extern union desc_union *gdt; + +extern struct per_cpu_gdt gdt_cpu_table[]; + +#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) +#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) +#define PTR_HIGH(x) ((unsigned long)(x) >> 32) + +enum { + DESC_TSS = 0x9, + DESC_LDT = 0x2, +}; + +extern struct gate_struct *idt; + +#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2)) +#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2)) + +extern void set_intr_gate(unsigned int irq, void * addr); +extern void set_tss_desc(unsigned int n, void *addr); + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/xen/include/asm-x86/x86_64/ldt.h b/xen/include/asm-x86/x86_64/ldt.h new file mode 100644 index 0000000000..e0f139829e --- /dev/null +++ b/xen/include/asm-x86/x86_64/ldt.h @@ -0,0 +1,39 @@ +#ifndef __ARCH_LDT_H +#define __ARCH_LDT_H + +#ifndef __ASSEMBLY__ + +static inline void load_LDT(struct task_struct *p) +{ + unsigned long ents; + + if ( (ents = p->mm.ldt_ents) == 0 ) + { + __asm__ __volatile__ ( "lldt %w0" : : "r" (0) ); + } + else + { + unsigned int cpu; + struct ldttss_desc *desc; + + cpu = smp_processor_id(); + desc = (struct ldttss_desc *)((char *)GET_GDT_ADDRESS(p) + __CPU_DESC_INDEX(cpu, ldt)); + desc->limit0 = ents*8-1; + desc->base0 = LDT_VIRT_START&0xffff; + desc->base1 = (LDT_VIRT_START&0xff0000)>>16; + desc->type = DESC_LDT; + desc->dpl = 0; + desc->p = 1; + desc->limit1 = 0; + desc->zero0 = 0; + desc->g = 0; + desc->base2 = (LDT_VIRT_START&0xff000000)>>24; + desc->base3 = LDT_VIRT_START>>32; + desc->zero1 = 0; + __load_LDT(cpu); + } +} + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h new file mode 100644 index 0000000000..cb8651ec8a --- /dev/null +++ b/xen/include/asm-x86/x86_64/page.h @@ -0,0 +1,305 @@ +#ifndef _X86_64_PAGE_H +#define _X86_64_PAGE_H + +#define BUG() do { \ + printk("BUG at %s:%d\n", __FILE__, __LINE__); \ + __asm__ __volatile__("ud2"); \ +} while (0) + +#define __PHYSICAL_MASK 0x0000ffffffffffffUL +#define PHYSICAL_PAGE_MASK 0x0000fffffffff000UL +#define PTE_MASK PHYSICAL_PAGE_MASK + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#ifdef __ASSEMBLY__ +#define PAGE_SIZE (0x1 << PAGE_SHIFT) +#else +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#endif +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) +#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) + +#define L1_PAGETABLE_SHIFT 12 +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 +#define L4_PAGETABLE_SHIFT 39 +#define LARGE_PFN (LARGE_PAGE_SIZE / PAGE_SIZE) + +#define ENTRIES_PER_L1_PAGETABLE 512 +#define ENTRIES_PER_L2_PAGETABLE 512 +#define ENTRIES_PER_L3_PAGETABLE 512 +#define ENTRIES_PER_L4_PAGETABLE 512 + +#define KERNEL_TEXT_SIZE (40UL*1024*1024) +#define KERNEL_TEXT_START 0xffffffff80000000UL + +/* Changing the next two defines should be enough to increase the kernel stack */ +/* We still hope 8K is enough, but ... */ +#define THREAD_ORDER 1 +#define THREAD_SIZE (2*PAGE_SIZE) + +#define INIT_TASK_SIZE THREAD_SIZE +#define CURRENT_MASK (~(THREAD_SIZE-1)) + +#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE) +#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE) + +#ifndef __ASSEMBLY__ +#include +typedef struct { unsigned long l1_lo; } l1_pgentry_t; +typedef struct { unsigned long l2_lo; } l2_pgentry_t; +typedef struct { unsigned long l3_lo; } l3_pgentry_t; +typedef struct { unsigned long l4_lo; } l4_pgentry_t; +typedef l1_pgentry_t *l1_pagetable_t; +typedef l2_pgentry_t *l2_pagetable_t; +typedef l3_pgentry_t *l3_pagetable_t; +typedef l4_pgentry_t *l4_pagetable_t; +typedef struct { unsigned long pt_lo; } pagetable_t; +typedef struct { unsigned long pgprot; } pgprot_t; +#endif /* !__ASSEMBLY__ */ + +/* Strip type from a table entry. */ +#define l1_pgentry_val(_x) ((_x).l1_lo) +#define l2_pgentry_val(_x) ((_x).l2_lo) +#define l3_pgentry_val(_x) ((_x).l3_lo) +#define l4_pgentry_val(_x) ((_x).l4_lo) +#define pagetable_val(_x) ((_x).pt_lo) + +#define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL)) +#define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL)) +#define alloc_l3_pagetable() ((l3_pgentry_t *)get_free_page(GFP_KERNEL)) +#define alloc_l4_pagetable() ((l4_pgentry_t *)get_free_page(GFP_KERNEL)) + +/* Add type to a table entry. */ +#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } ) +#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } ) +#define mk_l3_pgentry(_x) ( (l3_pgentry_t) { (_x) } ) +#define mk_l4_pgentry(_x) ( (l4_pgentry_t) { (_x) } ) +#define mk_pagetable(_x) ( (pagetable_t) { (_x) } ) + +/* Turn a typed table entry into a page index. */ +#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT) +#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT) +#define l3_pgentry_to_pagenr(_x) (l3_pgentry_val(_x) >> PAGE_SHIFT) +#define l4_pgentry_to_pagenr(_x) (l4_pgentry_val(_x) >> PAGE_SHIFT) + +/* Turn a typed table entry into a physical address. */ +#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK) +#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK) +#define l3_pgentry_to_phys(_x) (l3_pgentry_val(_x) & PAGE_MASK) +#define l4_pgentry_to_phys(_x) (l4_pgentry_val(_x) & PAGE_MASK) + +/* Dereference a typed level-2 entry to yield a typed level-1 table. */ +#define l2_pgentry_to_l1(_x) \ + ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK)) + +/* Dereference a typed level-4 entry to yield a typed level-3 table. */ +#define l4_pgentry_to_l3(_x) \ + ((l3_pgentry_t *)__va(l4_pgentry_val(_x) & PAGE_MASK)) + +/* Dereference a typed level-3 entry to yield a typed level-2 table. */ +#define l3_pgentry_to_l2(_x) \ + ((l2_pgentry_t *)__va(l3_pgentry_val(_x) & PAGE_MASK)) + +/* Given a virtual address, get an entry offset into a page table. */ +#define l1_table_offset(_a) \ + (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1)) +#define l2_table_offset(_a) \ + (((_a) >> L2_PAGETABLE_SHIFT) & (ENTRIES_PER_L2_PAGETABLE - 1)) +#define l3_table_offset(_a) \ + (((_a) >> L3_PAGETABLE_SHIFT) & (ENTRIES_PER_L3_PAGETABLE - 1)) +#define l4_table_offset(_a) \ + ((_a) >> L4_PAGETABLE_SHIFT) + +/* Hypervisor table entries use zero to sugnify 'empty'. */ +#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x)) +#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x)) +#define l3_pgentry_empty(_x) (!l3_pgentry_val(_x)) +#define l4_pgentry_empty(_x) (!l4_pgentry_val(_x)) + + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +/* + * NB. We don't currently track I/O holes in the physical RAM space. + * For now we guess that I/O devices will be mapped in the first 1MB + * (e.g., VGA buffers) or beyond the end of physical RAM. + */ +#define pfn_is_ram(_pfn) (((_pfn) > 0x100) && ((_pfn) < max_page)) + +/* High table entries are reserved by the hypervisor. */ +#define DOMAIN_ENTRIES_PER_L4_PAGETABLE \ + (HYPERVISOR_VIRT_START >> L4_PAGETABLE_SHIFT) +#define HYPERVISOR_ENTRIES_PER_L4_PAGETABLE \ + (ENTRIES_PER_L4_PAGETABLE - DOMAIN_ENTRIES_PER_L4_PAGETABLE) + +#define __START_KERNEL 0xffffffff80100000 +#define __START_KERNEL_map 0xffffffff80000000 +#define __PAGE_OFFSET 0x0000010000000000 +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include + +extern unsigned long vm_stack_flags, vm_stack_flags32; +extern unsigned long vm_data_default_flags, vm_data_default_flags32; +extern unsigned long vm_force_exec32; + +#define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START) + +extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE]; +extern void paging_init(void); + +#define __flush_tlb() \ + do { \ + __asm__ __volatile__ ( \ + "movl %%cr3, %%eax; movl %%eax, %%cr3" \ + : : : "memory", "eax" ); \ + tlb_clocktick(); \ + } while ( 0 ) + +/* Flush global pages as well. */ + +#define __pge_off() \ + do { \ + __asm__ __volatile__( \ + "movl %0, %%cr4; # turn off PGE " \ + :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \ + } while (0) + +#define __pge_on() \ + do { \ + __asm__ __volatile__( \ + "movl %0, %%cr4; # turn off PGE " \ + :: "r" (mmu_cr4_features)); \ + } while (0) + + +#define __flush_tlb_pge() \ + do { \ + __pge_off(); \ + __flush_tlb(); \ + __pge_on(); \ + } while (0) + +#define __flush_tlb_one(__addr) \ +__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr))) + +#include + +/* + * Tell the user there is some problem. The exception handler decodes this frame. + */ +struct bug_frame { + unsigned char ud2[2]; + char *filename; /* should use 32bit offset instead, but the assembler doesn't like it */ + unsigned short line; +} __attribute__((packed)); +#define HEADER_BUG() asm volatile("ud2 ; .quad %P1 ; .short %P0" :: "i"(__LINE__), \ + "i" (__stringify(__FILE__))) +#define PAGE_BUG(page) BUG() + +#endif /* ASSEMBLY */ + +#define _PAGE_PRESENT 0x001 +#define _PAGE_RW 0x002 +#define _PAGE_USER 0x004 +#define _PAGE_PWT 0x008 +#define _PAGE_PCD 0x010 +#define _PAGE_ACCESSED 0x020 +#define _PAGE_DIRTY 0x040 +#define _PAGE_PAT 0x080 +#define _PAGE_PSE 0x080 +#define _PAGE_GLOBAL 0x100 + +#define __PAGE_HYPERVISOR \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define __PAGE_HYPERVISOR_NOCACHE \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) +#define __PAGE_HYPERVISOR_RO \ + (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) + +#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL) + +#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR) +#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO) +#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE) + +#define mk_l4_writeable(_p) \ + (*(_p) = mk_l4_pgentry(l4_pgentry_val(*(_p)) | _PAGE_RW)) +#define mk_l4_readonly(_p) \ + (*(_p) = mk_l4_pgentry(l4_pgentry_val(*(_p)) & ~_PAGE_RW)) +#define mk_l3_writeable(_p) \ + (*(_p) = mk_l3_pgentry(l3_pgentry_val(*(_p)) | _PAGE_RW)) +#define mk_l3_readonly(_p) \ + (*(_p) = mk_l3_pgentry(l3_pgentry_val(*(_p)) & ~_PAGE_RW)) +#define mk_l2_writeable(_p) \ + (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) | _PAGE_RW)) +#define mk_l2_readonly(_p) \ + (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) & ~_PAGE_RW)) +#define mk_l1_writeable(_p) \ + (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) | _PAGE_RW)) +#define mk_l1_readonly(_p) \ + (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW)) + +/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. + Otherwise you risk miscompilation. */ +#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET) +/* __pa_symbol should use for C visible symbols, but only for them. + This seems to be the official gcc blessed way to do such arithmetic. */ +#define __pa_symbol(x) \ + ({unsigned long v; \ + asm("" : "=r" (v) : "0" (x)); \ + v - __START_KERNEL_map; }) +#define __pa_maybe_symbol(x) \ + ({unsigned long v; \ + asm("" : "=r" (v) : "0" (x)); \ + __pa(v); }) +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#ifndef CONFIG_DISCONTIGMEM +#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT)) +#define pfn_to_page(pfn) (frame_table + (pfn)) +#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT)) +#define VALID_PAGE(page) (((page) - frame_table) < max_mapnr) +#endif + +#ifndef __ASSEMBLY__ +static __inline__ int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} +#endif + +#define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) + +#define __VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define __VM_STACK_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_DATA_DEFAULT_FLAGS \ + ((current->thread.flags & THREAD_IA32) ? vm_data_default_flags32 : \ + vm_data_default_flags) +#define VM_STACK_FLAGS vm_stack_flags + +#endif /* _X86_64_PAGE_H */ diff --git a/xen/include/asm-x86/x86_64/pda.h b/xen/include/asm-x86/x86_64/pda.h new file mode 100644 index 0000000000..b9ca345ee4 --- /dev/null +++ b/xen/include/asm-x86/x86_64/pda.h @@ -0,0 +1,68 @@ +#ifndef X86_64_PDA_H +#define X86_64_PDA_H + +#include + +/* Per processor datastructure. %gs points to it while the kernel runs */ +/* To use a new field with the *_pda macros it needs to be added to tools/offset.c */ +struct x8664_pda { + unsigned long kernelstack; /* TOS for current process */ + unsigned long oldrsp; /* user rsp for system call */ + unsigned long irqrsp; /* Old rsp for interrupts. */ + struct task_struct *pcurrent; /* Current process */ + int irqcount; /* Irq nesting counter. Starts with -1 */ + int cpunumber; /* Logical CPU number */ + /* XXX: could be a single list */ + unsigned long *pgd_quick; + unsigned long *pmd_quick; + unsigned long *pte_quick; + unsigned long pgtable_cache_sz; + char *irqstackptr; /* top of irqstack */ + unsigned long volatile *level4_pgt; +} ____cacheline_aligned; + +#define PDA_STACKOFFSET (5*8) + +#define IRQSTACK_ORDER 2 +#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) + +extern struct x8664_pda cpu_pda[]; + +/* + * There is no fast way to get the base address of the PDA, all the accesses + * have to mention %fs/%gs. So it needs to be done this Torvaldian way. + */ +#define sizeof_field(type,field) (sizeof(((type *)0)->field)) +#define typeof_field(type,field) typeof(((type *)0)->field) + +extern void __bad_pda_field(void); +/* Don't use offsetof because it requires too much infrastructure */ +#define pda_offset(field) ((unsigned long)&((struct x8664_pda *)0)->field) + +#define pda_to_op(op,field,val) do { \ + switch (sizeof_field(struct x8664_pda, field)) { \ + case 2: asm volatile(op "w %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \ + case 4: asm volatile(op "l %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \ + case 8: asm volatile(op "q %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \ + default: __bad_pda_field(); \ + } \ + } while (0) + + +#define pda_from_op(op,field) ({ \ + typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \ + switch (sizeof_field(struct x8664_pda, field)) { \ + case 2: asm volatile(op "w %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \ + case 4: asm volatile(op "l %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \ + case 8: asm volatile(op "q %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \ + default: __bad_pda_field(); \ + } \ + ret__; }) + + +#define read_pda(field) pda_from_op("mov",field) +#define write_pda(field,val) pda_to_op("mov",field,val) +#define add_pda(field,val) pda_to_op("add",field,val) +#define sub_pda(field,val) pda_to_op("sub",field,val) + +#endif diff --git a/xen/include/asm-x86/x86_64/processor.h b/xen/include/asm-x86/x86_64/processor.h new file mode 100644 index 0000000000..ad3344cf3c --- /dev/null +++ b/xen/include/asm-x86/x86_64/processor.h @@ -0,0 +1,463 @@ +/* + * include/asm-x86_64/processor.h + * + * Copyright (C) 1994 Linus Torvalds + */ + +#ifndef __ASM_X86_64_PROCESSOR_H +#define __ASM_X86_64_PROCESSOR_H + +#include +#include +#include +#include +#include +#include + +struct task_struct; + +#define TF_MASK 0x00000100 +#define IF_MASK 0x00000200 +#define IOPL_MASK 0x00003000 +#define NT_MASK 0x00004000 +#define VM_MASK 0x00020000 +#define AC_MASK 0x00040000 +#define VIF_MASK 0x00080000 /* virtual interrupt flag */ +#define VIP_MASK 0x00100000 /* virtual interrupt pending */ +#define ID_MASK 0x00200000 + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; }) + +/* + * CPU type and hardware bug flags. Kept separately for each CPU. + * Members of this structure are referenced in head.S, so think twice + * before touching them. [mj] + */ + +struct cpuinfo_x86 { + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ + __u32 x86_capability[NCAPINTS]; + char x86_vendor_id[16]; + char x86_model_id[64]; + int x86_cache_size; /* in KB - valid for CPUS which support this + call */ + int x86_clflush_size; + int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ + __u8 x86_virt_bits, x86_phys_bits; + __u32 x86_power; + unsigned long loops_per_jiffy; +} ____cacheline_aligned; + +#define X86_VENDOR_INTEL 0 +#define X86_VENDOR_CYRIX 1 +#define X86_VENDOR_AMD 2 +#define X86_VENDOR_UMC 3 +#define X86_VENDOR_NEXGEN 4 +#define X86_VENDOR_CENTAUR 5 +#define X86_VENDOR_RISE 6 +#define X86_VENDOR_TRANSMETA 7 +#define X86_VENDOR_UNKNOWN 0xff + +/* + * capabilities of CPUs + */ + +extern struct cpuinfo_x86 boot_cpu_data; +extern struct tss_struct init_tss[NR_CPUS]; + +#ifdef CONFIG_SMP +extern struct cpuinfo_x86 cpu_data[]; +#define current_cpu_data cpu_data[smp_processor_id()] +#else +#define cpu_data (&boot_cpu_data) +#define current_cpu_data boot_cpu_data +#endif + +#define cpu_has_pge 1 +#define cpu_has_pse 1 +#define cpu_has_pae 1 +#define cpu_has_tsc 1 +#define cpu_has_de 1 +#define cpu_has_vme 1 +#define cpu_has_fxsr 1 +#define cpu_has_xmm 1 +#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) + +extern char ignore_irq13; + +extern void identify_cpu(struct cpuinfo_x86 *); +extern void print_cpu_info(struct cpuinfo_x86 *); +extern void dodgy_tsc(void); + +/* + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +/* + * Generic CPUID function + * FIXME: This really belongs to msr.h + */ +extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (op)); +} + +/* + * CPUID functions returning a single datum + */ +extern inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax; + + __asm__("cpuid" + : "=a" (eax) + : "0" (op) + : "bx", "cx", "dx"); + return eax; +} +extern inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx) + : "0" (op) + : "cx", "dx" ); + return ebx; +} +extern inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ecx; + + __asm__("cpuid" + : "=a" (eax), "=c" (ecx) + : "0" (op) + : "bx", "dx" ); + return ecx; +} +extern inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, edx; + + __asm__("cpuid" + : "=a" (eax), "=d" (edx) + : "0" (op) + : "bx", "cx"); + return edx; +} + + +/* + * Intel CPU flags in CR0 + */ +#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */ +#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor (RW) */ +#define X86_CR0_EM 0x00000004 /* Require FPU Emulation (RO) */ +#define X86_CR0_TS 0x00000008 /* Task Switched (RW) */ +#define X86_CR0_NE 0x00000020 /* Numeric Error Reporting (RW) */ +#define X86_CR0_WP 0x00010000 /* Supervisor Write Protect (RW) */ +#define X86_CR0_AM 0x00040000 /* Alignment Checking (RW) */ +#define X86_CR0_NW 0x20000000 /* Not Write-Through (RW) */ +#define X86_CR0_CD 0x40000000 /* Cache Disable (RW) */ +#define X86_CR0_PG 0x80000000 /* Paging (RW) */ + +#define read_cr0() ({ \ + unsigned long __dummy; \ + __asm__( \ + "movq %%cr0,%0\n\t" \ + :"=r" (__dummy)); \ + __dummy; \ +}) + +#define write_cr0(x) \ + __asm__("movq %0,%%cr0": :"r" (x)); + + + +/* + * Intel CPU features in CR4 + */ +#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ +#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ +#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ +#define X86_CR4_DE 0x0008 /* enable debugging extensions */ +#define X86_CR4_PSE 0x0010 /* enable page size extensions */ +#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ +#define X86_CR4_MCE 0x0040 /* Machine check enable */ +#define X86_CR4_PGE 0x0080 /* enable global pages */ +#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ +#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ +#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ + +/* + * Save the cr4 feature set we're using (ie + * Pentium 4MB enable and PPro Global page + * enable), so that any CPU's that boot up + * after us can get the correct flags. + */ +extern unsigned long mmu_cr4_features; + +static inline void set_in_cr4 (unsigned long mask) +{ + mmu_cr4_features |= mask; + __asm__("movq %%cr4,%%rax\n\t" + "orq %0,%%rax\n\t" + "movq %%rax,%%cr4\n" + : : "irg" (mask) + :"ax"); +} + +static inline void clear_in_cr4 (unsigned long mask) +{ + mmu_cr4_features &= ~mask; + __asm__("movq %%cr4,%%rax\n\t" + "andq %0,%%rax\n\t" + "movq %%rax,%%cr4\n" + : : "irg" (~mask) + :"ax"); +} + +/* + * Cyrix CPU configuration register indexes + */ +#define CX86_CCR0 0xc0 +#define CX86_CCR1 0xc1 +#define CX86_CCR2 0xc2 +#define CX86_CCR3 0xc3 +#define CX86_CCR4 0xe8 +#define CX86_CCR5 0xe9 +#define CX86_CCR6 0xea +#define CX86_CCR7 0xeb +#define CX86_DIR0 0xfe +#define CX86_DIR1 0xff +#define CX86_ARR_BASE 0xc4 +#define CX86_RCR_BASE 0xdc + +/* + * Cyrix CPU indexed register access macros + */ + +#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) + +#define setCx86(reg, data) do { \ + outb((reg), 0x22); \ + outb((data), 0x23); \ +} while (0) + +/* + * Bus types + */ +#define EISA_bus 0 +#define MCA_bus 0 +#define MCA_bus__is_a_macro + + +/* + * User space process size: 512GB - 1GB (default). + */ +#define TASK_SIZE (0x0000007fc0000000) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_32 0xa0000000 +#define TASK_UNMAPPED_64 (TASK_SIZE/3) +#define TASK_UNMAPPED_BASE \ + ((current->thread.flags & THREAD_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64) + +/* + * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. + */ +#define IO_BITMAP_SIZE 32 +#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) +#define INVALID_IO_BITMAP_OFFSET 0x8000 + +struct i387_fxsave_struct { + u16 cwd; + u16 swd; + u16 twd; + u16 fop; + u64 rip; + u64 rdp; + u32 mxcsr; + u32 mxcsr_mask; + u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */ + u32 padding[24]; +} __attribute__ ((aligned (16))); + +union i387_union { + struct i387_fxsave_struct fxsave; +}; + +typedef struct { + unsigned long seg; +} mm_segment_t; + +struct tss_struct { + unsigned short back_link,__blh; +/* u32 reserved1; */ + u64 rsp0; + u64 rsp1; + u64 rsp2; + u64 reserved2; + u64 ist[7]; + u32 reserved3; + u32 reserved4; + u16 reserved5; + u16 io_map_base; + u32 io_bitmap[IO_BITMAP_SIZE]; +} __attribute__((packed)) ____cacheline_aligned; + +struct thread_struct { + unsigned long guestos_sp; + unsigned long guestos_ss; + unsigned long rip; + unsigned long rsp; + unsigned long userrsp; /* Copy from PDA */ + unsigned long fs; + unsigned long gs; + unsigned short es, ds, fsindex, gsindex; + enum { + THREAD_IA32 = 0x0001, + } flags; +/* Hardware debugging registers */ + unsigned long debugreg[8]; /* %%db0-7 debug registers */ +/* floating point info */ + union i387_union i387; +/* Trap info. */ + trap_info_t traps[256]; +}; + +#define IDT_ENTRIES 256 +extern struct gate_struct idt_table[]; +extern struct gate_struct *idt_tables[]; + +#define INIT_THREAD { \ + 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 0, /* flags */ \ + { [0 ... 7] = 0 }, /* debugging registers */ \ + { { 0, }, }, /* 387 state */ \ + { {0} } /* io permissions */ \ +} + +#define INIT_TSS { \ + 0,0, /* back_link, __blh */ \ + 0, /* rsp0 */ \ + 0, 0, /* rsp1, rsp2 */ \ + 0, /* reserved */ \ + { [0 ... 6] = 0 }, /* ist[] */ \ + 0,0, /* reserved */ \ + 0, INVALID_IO_BITMAP_OFFSET, /* trace, bitmap */ \ + {~0, } /* ioperm */ \ +} + +struct mm_struct { + /* + * Every domain has a L1 pagetable of its own. Per-domain mappings + * are put in this table (eg. the current GDT is mapped here). + */ + l1_pgentry_t *perdomain_pt; + pagetable_t pagetable; + /* Current LDT details. */ + unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt; + /* Next entry is passed to LGDT on domain switch. */ + char gdt[10]; +}; + +#define IDLE0_MM \ +{ \ + perdomain_pt: 0, \ + pagetable: mk_pagetable(__pa(idle_pg_table)) \ +} + +/* Convenient accessor for mm.gdt. */ +#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e)) +#define SET_GDT_ADDRESS(_p, _a) ((*(u64 *)((_p)->mm.gdt + 2)) = (_a)) +#define GET_GDT_ENTRIES(_p) ((*(u16 *)((_p)->mm.gdt + 0))) +#define GET_GDT_ADDRESS(_p) ((*(u64 *)((_p)->mm.gdt + 2))) + +long set_gdt(struct task_struct *p, + unsigned long *frames, + unsigned int entries); + +long set_debugreg(struct task_struct *p, int reg, unsigned long value); + +struct microcode { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int reserved[5]; + unsigned int bits[500]; +}; + +/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ +#define MICROCODE_IOCFREE _IO('6',0) + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop"); +} + +#define cpu_relax() rep_nop() + +#define init_task (init_task_union.task) +#define init_stack (init_task_union.stack) + +/* Avoid speculative execution by the CPU */ +extern inline void sync_core(void) +{ + int tmp; + asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); +} + +#define cpu_has_fpu 1 + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +#define prefetch(x) __builtin_prefetch((x),0) +#define prefetchw(x) __builtin_prefetch((x),1) +#define spin_lock_prefetch(x) prefetchw(x) +#define cpu_relax() rep_nop() + + +#endif /* __ASM_X86_64_PROCESSOR_H */ diff --git a/xen/include/asm-x86/x86_64/ptrace.h b/xen/include/asm-x86/x86_64/ptrace.h new file mode 100644 index 0000000000..da0419f429 --- /dev/null +++ b/xen/include/asm-x86/x86_64/ptrace.h @@ -0,0 +1,114 @@ +#ifndef _X86_64_PTRACE_H +#define _X86_64_PTRACE_H + +#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 36 +#define RBX 40 +/* arguments: interrupts/non tracing syscalls only save upto here*/ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* = ERROR */ +/* end of arguments */ +/* cpu exception frame or undefined in case of fast syscall. */ +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 +#define ARGOFFSET R11 +#endif /* __ASSEMBLY__ */ + +/* top of stack page */ +#define FRAME_SIZE 168 + +#define PTRACE_SETOPTIONS 21 + +/* options set using PTRACE_SETOPTIONS */ +#define PTRACE_O_TRACESYSGOOD 0x00000001 + +/* Dummy values for ptrace */ +#define FS 1000 +#define GS 1008 + +#ifndef __ASSEMBLY__ + +struct pt_regs { + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long rbp; + unsigned long rbx; +/* arguments: non interrupts/non tracing syscalls only save upto here*/ + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long rax; + unsigned long rcx; + unsigned long rdx; + unsigned long rsi; + unsigned long rdi; + unsigned long orig_rax; +/* end of arguments */ +/* cpu exception frame or undefined */ + unsigned long rip; + unsigned long cs; + unsigned long eflags; + unsigned long rsp; + unsigned long ss; +/* top of stack page */ +}; + +#endif + +/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 +#define PTRACE_GETFPXREGS 18 +#define PTRACE_SETFPXREGS 19 + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#define user_mode(regs) (!!((regs)->cs & 3)) +#define instruction_pointer(regs) ((regs)->rip) +extern void show_regs(struct pt_regs *); + +enum { + EF_CF = 0x00000001, + EF_PF = 0x00000004, + EF_AF = 0x00000010, + EF_ZF = 0x00000040, + EF_SF = 0x00000080, + EF_TF = 0x00000100, + EF_IE = 0x00000200, + EF_DF = 0x00000400, + EF_OF = 0x00000800, + EF_IOPL = 0x00003000, + EF_IOPL_RING0 = 0x00000000, + EF_IOPL_RING1 = 0x00001000, + EF_IOPL_RING2 = 0x00002000, + EF_NT = 0x00004000, /* nested task */ + EF_RF = 0x00010000, /* resume */ + EF_VM = 0x00020000, /* virtual mode */ + EF_AC = 0x00040000, /* alignment */ + EF_VIF = 0x00080000, /* virtual interrupt */ + EF_VIP = 0x00100000, /* virtual interrupt pending */ + EF_ID = 0x00200000, /* id */ +}; + +#endif + +#endif diff --git a/xen/include/asm-x86/x86_64/uaccess.h b/xen/include/asm-x86/x86_64/uaccess.h new file mode 100644 index 0000000000..952e1b2f0a --- /dev/null +++ b/xen/include/asm-x86/x86_64/uaccess.h @@ -0,0 +1,314 @@ +#ifndef __X86_64_UACCESS_H +#define __X86_64_UACCESS_H + +/* + * User space memory access functions + */ +#include +#include +#include +#include +#include + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFFFFFFFFF) +#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current->addr_limit) +#define set_fs(x) (current->addr_limit = (x)) + +#define segment_eq(a,b) ((a).seg == (b).seg) + +#define __addr_ok(addr) (!((unsigned long)(addr) & (current->addr_limit.seg))) + +/* + * Uhhuh, this needs 65-bit arithmetic. We have a carry.. + */ +#define __range_not_ok(addr,size) ({ \ + unsigned long flag,sum; \ + asm("# range_ok\n\r" \ + "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \ + :"=&r" (flag), "=r" (sum) \ + :"1" (addr),"g" ((long)(size)),"g" (current->addr_limit.seg)); \ + flag; }) + +#define access_ok(type,addr,size) (__range_not_ok(addr,size) == 0) + +extern inline int verify_area(int type, const void * addr, unsigned long size) +{ + return access_ok(type,addr,size) ? 0 : -EFAULT; +} + + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * This gets kind of ugly. We want to return _two_ values in "get_user()" + * and yet we don't want to do any pointers, because that is too much + * of a performance impact. Thus we have a few rather ugly macros here, + * and hide all the ugliness from the user. + * + * The "__xxx" versions of the user access functions are versions that + * do not verify the address space, that must have been done previously + * with a separate "access_ok()" call (this is used when we do multiple + * accesses to the same area of user memory). + */ + +extern void __get_user_1(void); +extern void __get_user_2(void); +extern void __get_user_4(void); +extern void __get_user_8(void); + +#define __get_user_x(size,ret,x,ptr) \ + __asm__ __volatile__("call __get_user_" #size \ + :"=a" (ret),"=d" (x) \ + :"0" (ptr) \ + :"rbx") + +/* Careful: we have to cast the result to the type of the pointer for sign reasons */ +#define get_user(x,ptr) \ +({ long __val_gu; \ + int __ret_gu=1; \ + switch(sizeof (*(ptr))) { \ ++ case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1);break; \ ++ case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2);break; \ ++ case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4);break; \ ++ case 8: __ret_gu=copy_from_user(&__val_gu,ptr,8);break; \ ++ default: __ret_gu=copy_from_user(&__val_gu,ptr,sizeof(*(ptr)));break;\ + /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \ + /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \ + /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \ + /*case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break;*/ \ + /*default: __get_user_bad(); break;*/ \ + } \ + (x) = (__typeof__(*(ptr)))__val_gu; \ + __ret_gu; \ +}) + +extern void __put_user_1(void); +extern void __put_user_2(void); +extern void __put_user_4(void); +extern void __put_user_8(void); + +extern void __put_user_bad(void); + +#define __put_user_x(size,ret,x,ptr) \ + __asm__ __volatile__("call __put_user_" #size \ + :"=a" (ret) \ + :"0" (ptr),"d" (x) \ + :"rbx") + +#define put_user(x,ptr) \ + __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +#define __get_user(x,ptr) \ + __get_user_nocheck((x),(ptr),sizeof(*(ptr))) +#define __put_user(x,ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +#define __put_user_nocheck(x,ptr,size) \ +({ \ + int __pu_err; \ + __put_user_size((x),(ptr),(size),__pu_err); \ + __pu_err; \ +}) + + +#define __put_user_check(x,ptr,size) \ +({ \ + int __pu_err = -EFAULT; \ + __typeof__(*(ptr)) *__pu_addr = (ptr); \ + if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ + __put_user_size((x),__pu_addr,(size),__pu_err); \ + __pu_err; \ +}) + +#define __put_user_size(x,ptr,size,retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: __put_user_asm(x,ptr,retval,"b","b","iq",-EFAULT); break;\ + case 2: __put_user_asm(x,ptr,retval,"w","w","ir",-EFAULT); break;\ + case 4: __put_user_asm(x,ptr,retval,"l","k","ir",-EFAULT); break;\ + case 8: __put_user_asm(x,ptr,retval,"q","","ir",-EFAULT); break;\ + default: __put_user_bad(); \ + } \ +} while (0) + +/* FIXME: this hack is definitely wrong -AK */ +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct *)(x)) + +/* + * Tell gcc we read from memory instead of writing: this is because + * we do not write to any memory gcc knows about, so there are no + * aliasing issues. + */ +#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \ + __asm__ __volatile__( \ + "1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: mov %3,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 8\n" \ + " .quad 1b,3b\n" \ + ".previous" \ + : "=r"(err) \ + : ltype (x), "m"(__m(addr)), "i"(errno), "0"(err)) + + +#define __get_user_nocheck(x,ptr,size) \ +({ \ + int __gu_err; \ + long __gu_val; \ + __get_user_size(__gu_val,(ptr),(size),__gu_err); \ + (x) = (__typeof__(*(ptr)))__gu_val; \ + __gu_err; \ +}) + +extern int __get_user_bad(void); + +#define __get_user_size(x,ptr,size,retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: __get_user_asm(x,ptr,retval,"b","b","=q",-EFAULT); break;\ + case 2: __get_user_asm(x,ptr,retval,"w","w","=r",-EFAULT); break;\ + case 4: __get_user_asm(x,ptr,retval,"l","k","=r",-EFAULT); break;\ + case 8: __get_user_asm(x,ptr,retval,"q","","=r",-EFAULT); break;\ + default: (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \ + __asm__ __volatile__( \ + "1: mov"itype" %2,%"rtype"1\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: mov %3,%0\n" \ + " xor"itype" %"rtype"1,%"rtype"1\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 8\n" \ + " .quad 1b,3b\n" \ + ".previous" \ + : "=r"(err), ltype (x) \ + : "m"(__m(addr)), "i"(errno), "0"(err)) + +/* + * Copy To/From Userspace + */ + +/* Handles exceptions in both to and from, but doesn't do access_ok */ +extern unsigned long copy_user_generic(void *to, const void *from, unsigned len); + +extern unsigned long copy_to_user(void *to, const void *from, unsigned len); +extern unsigned long copy_from_user(void *to, const void *from, unsigned len); + +static inline int __copy_from_user(void *dst, const void *src, unsigned size) +{ + if (!__builtin_constant_p(size)) + return copy_user_generic(dst,src,size); + int ret = 0; + switch (size) { + case 1:__get_user_asm(*(u8*)dst,(u8 *)src,ret,"b","b","=q",1); + return ret; + case 2:__get_user_asm(*(u16*)dst,(u16*)src,ret,"w","w","=r",2); + return ret; + case 4:__get_user_asm(*(u32*)dst,(u32*)src,ret,"l","k","=r",4); + return ret; + case 8:__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",8); + return ret; + case 10: + __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16); + if (ret) return ret; + __get_user_asm(*(u16*)(8+dst),(u16*)(8+src),ret,"w","w","=r",2); + return ret; + case 16: + __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16); + if (ret) return ret; + __get_user_asm(*(u64*)(8+dst),(u64*)(8+src),ret,"q","","=r",8); + return ret; + default: + return copy_user_generic(dst,src,size); + } +} + +static inline int __copy_to_user(void *dst, const void *src, unsigned size) +{ + if (!__builtin_constant_p(size)) + return copy_user_generic(dst,src,size); + int ret = 0; + switch (size) { + case 1:__put_user_asm(*(u8*)src,(u8 *)dst,ret,"b","b","iq",1); + return ret; + case 2:__put_user_asm(*(u16*)src,(u16*)dst,ret,"w","w","ir",2); + return ret; + case 4:__put_user_asm(*(u32*)src,(u32*)dst,ret,"l","k","ir",4); + return ret; + case 8:__put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",8); + return ret; + case 10: + __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",10); + if (ret) return ret; + asm("":::"memory"); + __put_user_asm(4[(u16*)src],4+(u16*)dst,ret,"w","w","ir",2); + return ret; + case 16: + __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",16); + if (ret) return ret; + asm("":::"memory"); + __put_user_asm(1[(u64*)src],1+(u64*)dst,ret,"q","","ir",8); + return ret; + default: + return copy_user_generic(dst,src,size); + } +} + +long strncpy_from_user(char *dst, const char *src, long count); +long __strncpy_from_user(char *dst, const char *src, long count); +long strnlen_user(const char *str, long n); +long strlen_user(const char *str); +unsigned long clear_user(void *mem, unsigned long len); +unsigned long __clear_user(void *mem, unsigned long len); + +extern unsigned long search_exception_table(unsigned long); + +#endif /* __X86_64_UACCESS_H */ diff --git a/xen/include/asm-x86_64/apic.h b/xen/include/asm-x86_64/apic.h deleted file mode 100644 index 9044692e3b..0000000000 --- a/xen/include/asm-x86_64/apic.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef __ASM_APIC_H -#define __ASM_APIC_H - -#include -#include -#include -#include - -#ifdef CONFIG_X86_LOCAL_APIC - -#define APIC_DEBUG 0 - -#if APIC_DEBUG -#define Dprintk(x...) printk(x) -#else -#define Dprintk(x...) -#endif - -/* - * Basic functions accessing APICs. - */ - -static __inline void apic_write(unsigned long reg, unsigned int v) -{ - *((volatile unsigned int *)(APIC_BASE+reg)) = v; - barrier(); -} - -static __inline void apic_write_atomic(unsigned long reg, unsigned int v) -{ - xchg((volatile unsigned int *)(APIC_BASE+reg), v); -} - -static __inline unsigned int apic_read(unsigned long reg) -{ - return *((volatile unsigned int *)(APIC_BASE+reg)); -} - -static __inline__ void apic_wait_icr_idle(void) -{ - do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ); -} - -#ifdef CONFIG_X86_GOOD_APIC -# define FORCE_READ_AROUND_WRITE 0 -# define apic_read_around(x) -# define apic_write_around(x,y) apic_write((x),(y)) -#else -# define FORCE_READ_AROUND_WRITE 1 -# define apic_read_around(x) apic_read(x) -# define apic_write_around(x,y) apic_write_atomic((x),(y)) -#endif - -static inline void ack_APIC_irq(void) -{ - /* - * ack_APIC_irq() actually gets compiled as a single instruction: - * - a single rmw on Pentium/82489DX - * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) - * ... yummie. - */ - - /* Docs say use 0 for future compatibility */ - apic_write_around(APIC_EOI, 0); -} - -extern int get_maxlvt(void); -extern void connect_bsp_APIC (void); -extern void disconnect_bsp_APIC (void); -extern void disable_local_APIC (void); -extern int verify_local_APIC (void); -extern void sync_Arb_IDs (void); -extern void init_bsp_APIC (void); -extern void setup_local_APIC (void); -extern void init_apic_mappings (void); -extern void setup_APIC_clocks (void); -extern void setup_apic_nmi_watchdog (void); -extern inline void nmi_watchdog_tick (struct pt_regs * regs); -extern int APIC_init_uniprocessor (void); -extern void disable_APIC_timer(void); -extern void enable_APIC_timer(void); - -//extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback); -//extern void apic_pm_unregister(struct pm_dev*); - -extern unsigned int watchdog_on; - -extern unsigned int apic_timer_irqs [NR_CPUS]; -extern int check_nmi_watchdog (void); - -extern unsigned int nmi_watchdog; -#define NMI_NONE 0 -#define NMI_IO_APIC 1 -#define NMI_LOCAL_APIC 2 -#define NMI_INVALID 3 - -#endif /* CONFIG_X86_LOCAL_APIC */ - -#define clustered_apic_mode 0 -#define esr_disable 0 - -#endif /* __ASM_APIC_H */ diff --git a/xen/include/asm-x86_64/apicdef.h b/xen/include/asm-x86_64/apicdef.h deleted file mode 100644 index 8a787c3122..0000000000 --- a/xen/include/asm-x86_64/apicdef.h +++ /dev/null @@ -1,363 +0,0 @@ -#ifndef __ASM_APICDEF_H -#define __ASM_APICDEF_H - -/* - * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) - * - * Alan Cox , 1995. - * Ingo Molnar , 1999, 2000 - */ - -#define APIC_DEFAULT_PHYS_BASE 0xfee00000 - -#define APIC_ID 0x20 -#define APIC_ID_MASK (0x0F<<24) -#define GET_APIC_ID(x) (((x)>>24)&0x0F) -#define APIC_LVR 0x30 -#define APIC_LVR_MASK 0xFF00FF -#define GET_APIC_VERSION(x) ((x)&0xFF) -#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF) -#define APIC_INTEGRATED(x) ((x)&0xF0) -#define APIC_TASKPRI 0x80 -#define APIC_TPRI_MASK 0xFF -#define APIC_ARBPRI 0x90 -#define APIC_ARBPRI_MASK 0xFF -#define APIC_PROCPRI 0xA0 -#define APIC_EOI 0xB0 -#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */ -#define APIC_RRR 0xC0 -#define APIC_LDR 0xD0 -#define APIC_LDR_MASK (0xFF<<24) -#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF) -#define SET_APIC_LOGICAL_ID(x) (((x)<<24)) -#define APIC_ALL_CPUS 0xFF -#define APIC_DFR 0xE0 -#define APIC_SPIV 0xF0 -#define APIC_SPIV_FOCUS_DISABLED (1<<9) -#define APIC_SPIV_APIC_ENABLED (1<<8) -#define APIC_ISR 0x100 -#define APIC_TMR 0x180 -#define APIC_IRR 0x200 -#define APIC_ESR 0x280 -#define APIC_ESR_SEND_CS 0x00001 -#define APIC_ESR_RECV_CS 0x00002 -#define APIC_ESR_SEND_ACC 0x00004 -#define APIC_ESR_RECV_ACC 0x00008 -#define APIC_ESR_SENDILL 0x00020 -#define APIC_ESR_RECVILL 0x00040 -#define APIC_ESR_ILLREGA 0x00080 -#define APIC_ICR 0x300 -#define APIC_DEST_SELF 0x40000 -#define APIC_DEST_ALLINC 0x80000 -#define APIC_DEST_ALLBUT 0xC0000 -#define APIC_ICR_RR_MASK 0x30000 -#define APIC_ICR_RR_INVALID 0x00000 -#define APIC_ICR_RR_INPROG 0x10000 -#define APIC_ICR_RR_VALID 0x20000 -#define APIC_INT_LEVELTRIG 0x08000 -#define APIC_INT_ASSERT 0x04000 -#define APIC_ICR_BUSY 0x01000 -#define APIC_DEST_LOGICAL 0x00800 -#define APIC_DM_FIXED 0x00000 -#define APIC_DM_LOWEST 0x00100 -#define APIC_DM_SMI 0x00200 -#define APIC_DM_REMRD 0x00300 -#define APIC_DM_NMI 0x00400 -#define APIC_DM_INIT 0x00500 -#define APIC_DM_STARTUP 0x00600 -#define APIC_DM_EXTINT 0x00700 -#define APIC_VECTOR_MASK 0x000FF -#define APIC_ICR2 0x310 -#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF) -#define SET_APIC_DEST_FIELD(x) ((x)<<24) -#define APIC_LVTT 0x320 -#define APIC_LVTPC 0x340 -#define APIC_LVT0 0x350 -#define APIC_LVT_TIMER_BASE_MASK (0x3<<18) -#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) -#define SET_APIC_TIMER_BASE(x) (((x)<<18)) -#define APIC_TIMER_BASE_CLKIN 0x0 -#define APIC_TIMER_BASE_TMBASE 0x1 -#define APIC_TIMER_BASE_DIV 0x2 -#define APIC_LVT_TIMER_PERIODIC (1<<17) -#define APIC_LVT_MASKED (1<<16) -#define APIC_LVT_LEVEL_TRIGGER (1<<15) -#define APIC_LVT_REMOTE_IRR (1<<14) -#define APIC_INPUT_POLARITY (1<<13) -#define APIC_SEND_PENDING (1<<12) -#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) -#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) -#define APIC_MODE_FIXED 0x0 -#define APIC_MODE_NMI 0x4 -#define APIC_MODE_EXINT 0x7 -#define APIC_LVT1 0x360 -#define APIC_LVTERR 0x370 -#define APIC_TMICT 0x380 -#define APIC_TMCCT 0x390 -#define APIC_TDCR 0x3E0 -#define APIC_TDR_DIV_TMBASE (1<<2) -#define APIC_TDR_DIV_1 0xB -#define APIC_TDR_DIV_2 0x0 -#define APIC_TDR_DIV_4 0x1 -#define APIC_TDR_DIV_8 0x2 -#define APIC_TDR_DIV_16 0x3 -#define APIC_TDR_DIV_32 0x8 -#define APIC_TDR_DIV_64 0x9 -#define APIC_TDR_DIV_128 0xA - -#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) - -#define MAX_IO_APICS 16 - -/* - * the local APIC register structure, memory mapped. Not terribly well - * tested, but we might eventually use this one in the future - the - * problem why we cannot use it right now is the P5 APIC, it has an - * errata which cannot take 8-bit reads and writes, only 32-bit ones ... - */ -#define u32 unsigned int - -#define lapic ((volatile struct local_apic *)APIC_BASE) - -struct local_apic { - -/*000*/ struct { u32 __reserved[4]; } __reserved_01; - -/*010*/ struct { u32 __reserved[4]; } __reserved_02; - -/*020*/ struct { /* APIC ID Register */ - u32 __reserved_1 : 24, - phys_apic_id : 4, - __reserved_2 : 4; - u32 __reserved[3]; - } id; - -/*030*/ const - struct { /* APIC Version Register */ - u32 version : 8, - __reserved_1 : 8, - max_lvt : 8, - __reserved_2 : 8; - u32 __reserved[3]; - } version; - -/*040*/ struct { u32 __reserved[4]; } __reserved_03; - -/*050*/ struct { u32 __reserved[4]; } __reserved_04; - -/*060*/ struct { u32 __reserved[4]; } __reserved_05; - -/*070*/ struct { u32 __reserved[4]; } __reserved_06; - -/*080*/ struct { /* Task Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } tpr; - -/*090*/ const - struct { /* Arbitration Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } apr; - -/*0A0*/ const - struct { /* Processor Priority Register */ - u32 priority : 8, - __reserved_1 : 24; - u32 __reserved_2[3]; - } ppr; - -/*0B0*/ struct { /* End Of Interrupt Register */ - u32 eoi; - u32 __reserved[3]; - } eoi; - -/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; - -/*0D0*/ struct { /* Logical Destination Register */ - u32 __reserved_1 : 24, - logical_dest : 8; - u32 __reserved_2[3]; - } ldr; - -/*0E0*/ struct { /* Destination Format Register */ - u32 __reserved_1 : 28, - model : 4; - u32 __reserved_2[3]; - } dfr; - -/*0F0*/ struct { /* Spurious Interrupt Vector Register */ - u32 spurious_vector : 8, - apic_enabled : 1, - focus_cpu : 1, - __reserved_2 : 22; - u32 __reserved_3[3]; - } svr; - -/*100*/ struct { /* In Service Register */ -/*170*/ u32 bitfield; - u32 __reserved[3]; - } isr [8]; - -/*180*/ struct { /* Trigger Mode Register */ -/*1F0*/ u32 bitfield; - u32 __reserved[3]; - } tmr [8]; - -/*200*/ struct { /* Interrupt Request Register */ -/*270*/ u32 bitfield; - u32 __reserved[3]; - } irr [8]; - -/*280*/ union { /* Error Status Register */ - struct { - u32 send_cs_error : 1, - receive_cs_error : 1, - send_accept_error : 1, - receive_accept_error : 1, - __reserved_1 : 1, - send_illegal_vector : 1, - receive_illegal_vector : 1, - illegal_register_address : 1, - __reserved_2 : 24; - u32 __reserved_3[3]; - } error_bits; - struct { - u32 errors; - u32 __reserved_3[3]; - } all_errors; - } esr; - -/*290*/ struct { u32 __reserved[4]; } __reserved_08; - -/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; - -/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; - -/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; - -/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; - -/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; - -/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; - -/*300*/ struct { /* Interrupt Command Register 1 */ - u32 vector : 8, - delivery_mode : 3, - destination_mode : 1, - delivery_status : 1, - __reserved_1 : 1, - level : 1, - trigger : 1, - __reserved_2 : 2, - shorthand : 2, - __reserved_3 : 12; - u32 __reserved_4[3]; - } icr1; - -/*310*/ struct { /* Interrupt Command Register 2 */ - union { - u32 __reserved_1 : 24, - phys_dest : 4, - __reserved_2 : 4; - u32 __reserved_3 : 24, - logical_dest : 8; - } dest; - u32 __reserved_4[3]; - } icr2; - -/*320*/ struct { /* LVT - Timer */ - u32 vector : 8, - __reserved_1 : 4, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - timer_mode : 1, - __reserved_3 : 14; - u32 __reserved_4[3]; - } lvt_timer; - -/*330*/ struct { u32 __reserved[4]; } __reserved_15; - -/*340*/ struct { /* LVT - Performance Counter */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_pc; - -/*350*/ struct { /* LVT - LINT0 */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - polarity : 1, - remote_irr : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15; - u32 __reserved_3[3]; - } lvt_lint0; - -/*360*/ struct { /* LVT - LINT1 */ - u32 vector : 8, - delivery_mode : 3, - __reserved_1 : 1, - delivery_status : 1, - polarity : 1, - remote_irr : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15; - u32 __reserved_3[3]; - } lvt_lint1; - -/*370*/ struct { /* LVT - Error */ - u32 vector : 8, - __reserved_1 : 4, - delivery_status : 1, - __reserved_2 : 3, - mask : 1, - __reserved_3 : 15; - u32 __reserved_4[3]; - } lvt_error; - -/*380*/ struct { /* Timer Initial Count Register */ - u32 initial_count; - u32 __reserved_2[3]; - } timer_icr; - -/*390*/ const - struct { /* Timer Current Count Register */ - u32 curr_count; - u32 __reserved_2[3]; - } timer_ccr; - -/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; - -/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; - -/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; - -/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; - -/*3E0*/ struct { /* Timer Divide Configuration Register */ - u32 divisor : 4, - __reserved_1 : 28; - u32 __reserved_2[3]; - } timer_dcr; - -/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; - -} __attribute__ ((packed)); - -#undef u32 - -#endif diff --git a/xen/include/asm-x86_64/atomic.h b/xen/include/asm-x86_64/atomic.h deleted file mode 100644 index 1f5dc5085d..0000000000 --- a/xen/include/asm-x86_64/atomic.h +++ /dev/null @@ -1,205 +0,0 @@ -#ifndef __ARCH_X86_64_ATOMIC__ -#define __ARCH_X86_64_ATOMIC__ - -#include - -/* - * Atomic operations that C can't guarantee us. Useful for - * resource counting etc.. - */ - -#ifdef CONFIG_SMP -#define LOCK "lock ; " -#else -#define LOCK "" -#endif - -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { volatile int counter; } atomic_t; - -#define ATOMIC_INIT(i) { (i) } - -/** - * atomic_read - read atomic variable - * @v: pointer of type atomic_t - * - * Atomically reads the value of @v. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -#define atomic_read(v) ((v)->counter) - -/** - * atomic_set - set atomic variable - * @v: pointer of type atomic_t - * @i: required value - * - * Atomically sets the value of @v to @i. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -#define atomic_set(v,i) (((v)->counter) = (i)) - -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v. Note that the guaranteed useful range - * of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_add(int i, atomic_t *v) -{ - __asm__ __volatile__( - LOCK "addl %1,%0" - :"=m" (v->counter) - :"ir" (i), "m" (v->counter)); -} - -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_sub(int i, atomic_t *v) -{ - __asm__ __volatile__( - LOCK "subl %1,%0" - :"=m" (v->counter) - :"ir" (i), "m" (v->counter)); -} - -/** - * atomic_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ int atomic_sub_and_test(int i, atomic_t *v) -{ - unsigned char c; - - __asm__ __volatile__( - LOCK "subl %2,%0; sete %1" - :"=m" (v->counter), "=qm" (c) - :"ir" (i), "m" (v->counter) : "memory"); - return c; -} - -/** - * atomic_inc - increment atomic variable - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_inc(atomic_t *v) -{ - __asm__ __volatile__( - LOCK "incl %0" - :"=m" (v->counter) - :"m" (v->counter)); -} - -/** - * atomic_dec - decrement atomic variable - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_dec(atomic_t *v) -{ - __asm__ __volatile__( - LOCK "decl %0" - :"=m" (v->counter) - :"m" (v->counter)); -} - -/** - * atomic_dec_and_test - decrement and test - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ int atomic_dec_and_test(atomic_t *v) -{ - unsigned char c; - - __asm__ __volatile__( - LOCK "decl %0; sete %1" - :"=m" (v->counter), "=qm" (c) - :"m" (v->counter) : "memory"); - return c != 0; -} - -/** - * atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ int atomic_inc_and_test(atomic_t *v) -{ - unsigned char c; - - __asm__ __volatile__( - LOCK "incl %0; sete %1" - :"=m" (v->counter), "=qm" (c) - :"m" (v->counter) : "memory"); - return c != 0; -} - -/** - * atomic_add_negative - add and test if negative - * @v: pointer of type atomic_t - * @i: integer value to add - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ int atomic_add_negative(int i, atomic_t *v) -{ - unsigned char c; - - __asm__ __volatile__( - LOCK "addl %2,%0; sets %1" - :"=m" (v->counter), "=qm" (c) - :"ir" (i), "m" (v->counter) : "memory"); - return c; -} - - -/* These are x86-specific, used by some header files */ -#define atomic_clear_mask(mask, addr) \ -__asm__ __volatile__(LOCK "andl %0,%1" \ -: : "r" (~(mask)),"m" (*addr) : "memory") - -#define atomic_set_mask(mask, addr) \ -__asm__ __volatile__(LOCK "orl %0,%1" \ -: : "r" ((unsigned)mask),"m" (*addr) : "memory") - -/* Atomic operations are already serializing on x86 */ -#define smp_mb__before_atomic_dec() barrier() -#define smp_mb__after_atomic_dec() barrier() -#define smp_mb__before_atomic_inc() barrier() -#define smp_mb__after_atomic_inc() barrier() - -#endif /* __ARCH_X86_64_ATOMIC__ */ diff --git a/xen/include/asm-x86_64/bitops.h b/xen/include/asm-x86_64/bitops.h deleted file mode 100644 index 45209b11bd..0000000000 --- a/xen/include/asm-x86_64/bitops.h +++ /dev/null @@ -1,405 +0,0 @@ -#ifndef _X86_64_BITOPS_H -#define _X86_64_BITOPS_H - -/* - * Copyright 1992, Linus Torvalds. - */ - -#include - -/* - * These have to be done with inline assembly: that way the bit-setting - * is guaranteed to be atomic. All bit operations return 0 if the bit - * was cleared before the operation and != 0 if it was not. - * - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). - */ - -#ifdef CONFIG_SMP -#define LOCK_PREFIX "lock ; " -#else -#define LOCK_PREFIX "" -#endif - -#define ADDR (*(volatile long *) addr) - -/** - * set_bit - Atomically set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * This function is atomic and may not be reordered. See __set_bit() - * if you do not require the atomic guarantees. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static __inline__ void set_bit(long nr, volatile void * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btsq %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); -} - -/** - * __set_bit - Set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * Unlike set_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __inline__ void __set_bit(long nr, volatile void * addr) -{ - __asm__( - "btsq %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); -} - -/** - * clear_bit - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and may not be reordered. However, it does - * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() - * in order to ensure changes are visible on other processors. - */ -static __inline__ void clear_bit(long nr, volatile void * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btrq %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); -} -#define smp_mb__before_clear_bit() barrier() -#define smp_mb__after_clear_bit() barrier() - -/** - * __change_bit - Toggle a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * Unlike change_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __inline__ void __change_bit(long nr, volatile void * addr) -{ - __asm__ __volatile__( - "btcq %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); -} - -/** - * change_bit - Toggle a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * change_bit() is atomic and may not be reordered. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static __inline__ void change_bit(long nr, volatile void * addr) -{ - __asm__ __volatile__( LOCK_PREFIX - "btcq %1,%0" - :"=m" (ADDR) - :"dIr" (nr)); -} - -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __inline__ int test_and_set_bit(long nr, volatile void * addr) -{ - long oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btsq %2,%1\n\tsbbq %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"dIr" (nr) : "memory"); - return oldbit; -} - -/** - * __test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static __inline__ int __test_and_set_bit(long nr, volatile void * addr) -{ - long oldbit; - - __asm__( - "btsq %2,%1\n\tsbbq %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"dIr" (nr)); - return oldbit; -} - -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __inline__ int test_and_clear_bit(long nr, volatile void * addr) -{ - long oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btrq %2,%1\n\tsbbq %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"dIr" (nr) : "memory"); - return oldbit; -} - -/** - * __test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static __inline__ int __test_and_clear_bit(long nr, volatile void * addr) -{ - long oldbit; - - __asm__( - "btrq %2,%1\n\tsbbq %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"dIr" (nr)); - return oldbit; -} - -/* WARNING: non atomic and it can be reordered! */ -static __inline__ int __test_and_change_bit(long nr, volatile void * addr) -{ - long oldbit; - - __asm__ __volatile__( - "btcq %2,%1\n\tsbbq %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"dIr" (nr) : "memory"); - return oldbit; -} - -/** - * test_and_change_bit - Change a bit and return its new value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __inline__ int test_and_change_bit(long nr, volatile void * addr) -{ - long oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btcq %2,%1\n\tsbbq %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"dIr" (nr) : "memory"); - return oldbit; -} - -#if 0 /* Fool kernel-doc since it doesn't do macros yet */ -/** - * test_bit - Determine whether a bit is set - * @nr: bit number to test - * @addr: Address to start counting from - */ -static int test_bit(int nr, const volatile void * addr); -#endif - -static __inline__ int constant_test_bit(long nr, const volatile void * addr) -{ - return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; -} - -static __inline__ int variable_test_bit(long nr, volatile void * addr) -{ - long oldbit; - - __asm__ __volatile__( - "btq %2,%1\n\tsbbq %0,%0" - :"=r" (oldbit) - :"m" (ADDR),"dIr" (nr)); - return oldbit; -} - -#define test_bit(nr,addr) \ -(__builtin_constant_p(nr) ? \ - constant_test_bit((nr),(addr)) : \ - variable_test_bit((nr),(addr))) - -/** - * find_first_zero_bit - find the first zero bit in a memory region - * @addr: The address to start the search at - * @size: The maximum bitnumber to search - * - * Returns the bit-number of the first zero bit, not the number of the byte - * containing a bit. -1 when none found. - */ -static __inline__ int find_first_zero_bit(void * addr, unsigned size) -{ - int d0, d1, d2; - int res; - - if (!size) - return 0; - __asm__ __volatile__( - "movl $-1,%%eax\n\t" - "xorl %%edx,%%edx\n\t" - "repe; scasl\n\t" - "je 1f\n\t" - "xorl -4(%%rdi),%%eax\n\t" - "subq $4,%%rdi\n\t" - "bsfl %%eax,%%edx\n" - "1:\tsubq %%rbx,%%rdi\n\t" - "shlq $3,%%rdi\n\t" - "addq %%rdi,%%rdx" - :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) - :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory"); - return res; -} - -/** - * find_next_zero_bit - find the first zero bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -static __inline__ int find_next_zero_bit (void * addr, int size, int offset) -{ - unsigned int * p = ((unsigned int *) addr) + (offset >> 5); - int set = 0, bit = offset & 31, res; - - if (bit) { - /* - * Look for zero in first byte - */ - __asm__("bsfl %1,%0\n\t" - "jne 1f\n\t" - "movl $32, %0\n" - "1:" - : "=r" (set) - : "r" (~(*p >> bit))); - if (set < (32 - bit)) - return set + offset; - set = 32 - bit; - p++; - } - /* - * No zero yet, search remaining full bytes for a zero - */ - res = find_first_zero_bit (p, size - 32 * (p - (unsigned int *) addr)); - return (offset + set + res); -} - -/* - * Find string of zero bits in a bitmap. -1 when not found. - */ -extern unsigned long -find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len); - -static inline void set_bit_string(unsigned long *bitmap, unsigned long i, - int len) -{ - unsigned long end = i + len; - while (i < end) { - __set_bit(i, bitmap); - i++; - } -} - -static inline void clear_bit_string(unsigned long *bitmap, unsigned long i, - int len) -{ - unsigned long end = i + len; - while (i < end) { - clear_bit(i, bitmap); - i++; - } -} - -/** - * ffz - find first zero in word. - * @word: The word to search - * - * Undefined if no zero exists, so code should check against ~0UL first. - */ -static __inline__ unsigned long ffz(unsigned long word) -{ - __asm__("bsfq %1,%0" - :"=r" (word) - :"r" (~word)); - return word; -} - - -/** - * ffs - find first bit set - * @x: the word to search - * - * This is defined the same way as - * the libc and compiler builtin ffs routines, therefore - * differs in spirit from the above ffz (man ffs). - */ -static __inline__ int ffs(int x) -{ - int r; - - __asm__("bsfl %1,%0\n\t" - "jnz 1f\n\t" - "movl $-1,%0\n" - "1:" : "=r" (r) : "g" (x)); - return r+1; -} - -/** - * hweightN - returns the hamming weight of a N-bit word - * @x: the word to weigh - * - * The Hamming Weight of a number is the total number of bits set in it. - */ - -#define hweight32(x) generic_hweight32(x) -#define hweight16(x) generic_hweight16(x) -#define hweight8(x) generic_hweight8(x) - - - -#define ext2_set_bit __test_and_set_bit -#define ext2_clear_bit __test_and_clear_bit -#define ext2_test_bit test_bit -#define ext2_find_first_zero_bit find_first_zero_bit -#define ext2_find_next_zero_bit find_next_zero_bit - -/* Bitmap functions for the minix filesystem. */ -#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr) -#define minix_set_bit(nr,addr) __set_bit(nr,addr) -#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr) -#define minix_test_bit(nr,addr) test_bit(nr,addr) -#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) - - -#endif /* _X86_64_BITOPS_H */ diff --git a/xen/include/asm-x86_64/cache.h b/xen/include/asm-x86_64/cache.h deleted file mode 100644 index a1d7349a81..0000000000 --- a/xen/include/asm-x86_64/cache.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * include/asm-x8664/cache.h - */ -#ifndef __ARCH_X8664_CACHE_H -#define __ARCH_X8664_CACHE_H - -#include - -/* L1 cache line size */ -#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) - -#endif diff --git a/xen/include/asm-x86_64/config.h b/xen/include/asm-x86_64/config.h deleted file mode 100644 index 5a0acabf2a..0000000000 --- a/xen/include/asm-x86_64/config.h +++ /dev/null @@ -1,292 +0,0 @@ -/****************************************************************************** - * config.h - * - * A Linux-style configuration list. - * - */ - -#ifndef __XEN_X86_64_CONFIG_H__ -#define __XEN_X86_64_CONFIG_H__ - -#define CONFIG_X86 1 -#define CONFIG_X86_64BITMODE 1 - -#define CONFIG_SMP 1 -#define CONFIG_X86_LOCAL_APIC 1 -#define CONFIG_X86_IO_APIC 1 -#define CONFIG_X86_L1_CACHE_SHIFT 5 - -#define CONFIG_PCI 1 -#define CONFIG_PCI_BIOS 1 -#define CONFIG_PCI_DIRECT 1 - -#define CONFIG_IDE 1 -#define CONFIG_BLK_DEV_IDE 1 -#define CONFIG_BLK_DEV_IDEDMA 1 -#define CONFIG_BLK_DEV_IDEPCI 1 -#define CONFIG_IDEDISK_MULTI_MODE 1 -#define CONFIG_IDEDISK_STROKE 1 -#define CONFIG_IDEPCI_SHARE_IRQ 1 -#define CONFIG_BLK_DEV_IDEDMA_PCI 1 -#define CONFIG_IDEDMA_PCI_AUTO 1 -#define CONFIG_IDEDMA_AUTO 1 -#define CONFIG_IDEDMA_ONLYDISK 1 -#define CONFIG_BLK_DEV_IDE_MODES 1 -#define CONFIG_BLK_DEV_PIIX 1 - -#define CONFIG_SCSI 1 -#define CONFIG_SCSI_LOGGING 1 -#define CONFIG_BLK_DEV_SD 1 -#define CONFIG_SD_EXTRA_DEVS 40 -#define CONFIG_SCSI_MULTI_LUN 1 - -#define CONFIG_XEN_ATTENTION_KEY 1 - -#define HZ 100 - -/* - * Just to keep compiler happy. - * NB. DO NOT CHANGE SMP_CACHE_BYTES WITHOUT FIXING arch/i386/entry.S!!! - * It depends on size of irq_cpustat_t, for example, being 64 bytes. :-) - * Mmmm... so niiiiiice.... - */ -#define SMP_CACHE_BYTES 64 -#define NR_CPUS 16 -#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) -#define ____cacheline_aligned __cacheline_aligned - -#define PHYSICAL_ADDRESS_BITS 52 -#define MAX_PHYSICAL_ADDRESS (1 << PHYSICAL_ADDRESS_BITS) -#define VIRTUAL_ADDRESS_BITS 48 -#define XEN_PAGE_SIZE 4096 - -#define PTE_SIZE 8 -#define TOTAL_PTES (512ULL * 512 * 512 * 512) - -/* next PML4 from an _END address */ -#define PML4_BITS 39 -#define PML4_SPACE (1ULL << PML4_BITS) - -/* - * Memory layout - * - * 0x0000000000000000 - 0x00007fffffffffff Guest & user apps (128TB) - * (Only for 32-bit guests) - * 0x00000000fc000000 - 0x00000000fc3fffff Machine/Physical 32-bit shadow (4MB) - * 0x00000000fc400000 - 0x00000000feffffff IO remap for 32-bit guests (44MB) - * 0x00000000ff000000 - 0x00000000ff3fffff 32-bit PTE shadow (4MB) - * - * 0xffff800000000000 - 0xffff807fffffffff Linear page table (512GB) - * 0xffff808000000000 - 0xffff80ffffffffff Reserved for shadow page table (512GB) - * - * 0xffff810000000000 - 0xffff82ffffffffff Xen PML4 slots - * 0xffff810000000000 - 0xffff81003fffffff Xen hypervisor virtual space (1GB) - * 0xffff810040000000 - 0xffff81807fffffff Per-domain mappings (1GB) - * 0xffff810080000000 - 0xffff81387fffffff R/O physical map (224GB) - * 0xffff813880000000 - 0xffff81707fffffff R/W physical map (224GB) - * 0xffff817080000000 - 0xffff82c07fffffff Frame table (1344GB) - * 0xffff82c080000000 - 0xffff82c0bfffffff I/O remap space (1GB) - * 0xffff82c0c0000000 - 0xffff82ffffffffff (253GB) - * - * 0xffff830000000000 - 0xffff87ffffffffff RESERVED (5TB) - * - * 0xffff880000000000 - ... Physical 1:1 direct mapping (112TB max) - * 0xffff880000000000 - 0xffff880001000000 Low memory DMA region (16M) - * - * 0xfffff80000000000 - 0xffffffffffffffff Reserved for guest (8TB) - * - * The requirement that we have a 1:1 map of physical memory limits - * the maximum memory size we can support. With only 48 virtual address - * bits, and the assumption that guests will run users in positive address - * space, a contiguous 1:1 map can only live in the negative address space. - * Since we don't want to bump guests out of the very top of memory and - * force relocation, we can't use this entire space, and Xen has several - * heavy mapping that require PML4 slices. Just to be safe, we reserve - * 16 PML4s each for Xen and the guest. 224 PML4s give us 112 terabytes - * of addressable memory. Any high device physical addresses beyond this - * region can be mapped into the IO remap space or some of the reserved - * 6TB region. - * - * 112 TB is just 16 TB shy of the maximum physical memory supported - * on Linux 2.6.0, and should be enough for anybody. - * - * There are some additional constraints in the memory layout that require - * several changes from the i386 architecture. - * - * ACPI data and ACPI non-volatile storage must be placed in some region - * of memory below the 4GB mark. Depending on the BIOS and system, we - * may have this located as low as 1GB. This means allocating large - * chunks of physically contiguous memory from the direct mapping may not - * be possible. - * - * The full frame table for 112TB of physical memory currently occupies - * 1344GB space. This clearly can not be allocated in physically contiguous - * space, so it must be moved to a virtual address. - * - * Both copies of the machine->physical table must also be relocated. - * (112 TB / 4k) * 8 bytes means that each copy of the physical map requires - * 224GB of space, thus it also must move to VM space. - * - * The physical pages used to allocate the page tables for the direct 1:1 - * map may occupy (112TB / 2M) * 8 bytes = 448MB. This is almost guaranteed - * to fit in contiguous physical memory, but these pages used to be allocated - * in the Xen monitor address space. This means the Xen address space must - * accomodate up to ~500 MB, which means it also must move out of the - * direct mapped region. - * - * Since both copies of the MPT, the frame table, and Xen now exist in - * purely virtual space, we have the added advantage of being able to - * map them to local pages on NUMA machines, or use NUMA aware memory - * allocation within Xen itself. - * - * Additionally, the 1:1 page table now exists contiguously in virtual - * space, but may be mapped to physically separated pages, allowing - * each node to contain the page tables for its own local memory. Setting - * up this mapping presents a bit of a chicken-egg problem, but is possible - * as a future enhancement. - * - * Zachary Amsden (zamsden@cisco.com) - * - */ - -/* Guest and user space */ -#define NSPACE_VIRT_START 0 -#define NSPACE_VIRT_END (1ULL << (VIRTUAL_ADDRESS_BITS - 1)) - -/* Priviledged space */ -#define ESPACE_VIRT_END 0 -#define ESPACE_VIRT_START (ESPACE_VIRT_END-(1ULL << (VIRTUAL_ADDRESS_BITS-1))) - -/* reservations in e-space */ -#define GUEST_RESERVED_PML4S 16 -#define XEN_RESERVED_PML4S 16 - -#define MAX_MEMORY_SIZE ((1ULL << (VIRTUAL_ADDRESS_BITS-1)) \ - -((GUEST_RESERVED_PML4S + XEN_RESERVED_PML4S) * PML4_SPACE)) -#define MAX_MEMORY_FRAMES (MAX_MEMORY_SIZE / XEN_PAGE_SIZE) - -/* - * Virtual addresses beyond this are not modifiable by guest OSes. - */ -#define HYPERVISOR_VIRT_START ESPACE_VIRT_START -#define HYPERVISOR_VIRT_END (ESPACE_VIRT_END-(GUEST_RESERVED_PML4S * PML4_SPACE)) - -/* First 512GB of virtual address space is used as a linear p.t. mapping. */ -#define LINEAR_PT_VIRT_START (HYPERVISOR_VIRT_START) -#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + (PTE_SIZE * TOTAL_PTES)) - -/* Reserve some space for a shadow PT mapping */ -#define SHADOW_PT_VIRT_START (LINEAR_PT_VIRT_END) -#define SHADOW_PT_VIRT_END (SHADOW_PT_VIRT_START + (PTE_SIZE * TOTAL_PTES)) - -/* Xen exists in the first 1GB of the next PML4 space */ -#define MAX_MONITOR_ADDRESS (1 * 1024 * 1024 * 1024) -#define MONITOR_VIRT_START (SHADOW_PT_VIRT_END) -#define MONITOR_VIRT_END (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS) - -/* Next 1GB of virtual address space used for per-domain mappings (eg. GDT). */ -#define PERDOMAIN_VIRT_START (MONITOR_VIRT_END) -#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (512 * 512 * 4096)) -#define GDT_VIRT_START (PERDOMAIN_VIRT_START) -#define GDT_VIRT_END (GDT_VIRT_START + (128*1024)) -#define LDT_VIRT_START (GDT_VIRT_END) -#define LDT_VIRT_END (LDT_VIRT_START + (128*1024)) - -/* - * First set of MPTs are mapped read-only for all. It's for the machine->physical - * mapping table (MPT table). The following are virtual addresses. - */ -#define READONLY_MPT_VIRT_START (PERDOMAIN_VIRT_END) -#define READONLY_MPT_VIRT_END (READONLY_MPT_VIRT_START + (PTE_SIZE * MAX_MEMORY_FRAMES)) - -/* R/W machine->physical table */ -#define RDWR_MPT_VIRT_START (READONLY_MPT_VIRT_END) -#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (PTE_SIZE * MAX_MEMORY_FRAMES)) - -/* Frame table */ -#define FRAMETABLE_ENTRY_SIZE (48) -#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END) -#define FRAMETABLE_VIRT_END (FRAMETABLE_VIRT_START + (FRAMETABLE_ENTRY_SIZE * MAX_MEMORY_FRAMES)) - -/* Next 1GB of virtual address space used for ioremap(). */ -#define IOREMAP_VIRT_START (FRAMETABLE_VIRT_END) -#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (512 * 512 * 4096)) - -/* And the virtual addresses for the direct-map region... */ -#define DIRECTMAP_VIRT_START (ESPACE_VIRT_START + (XEN_RESERVED_PML4S * PML4_SPACE)) -#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS) - -/* - * Next is the direct-mapped memory region. The following are machine addresses. - */ -#define MAX_DMA_ADDRESS (16*1024*1024) -#define MAX_DIRECTMAP_ADDRESS MAX_MEMORY_SIZE - - - -/* - * Amount of slack domain memory to leave in system, in kilobytes. - * Prevents a hard out-of-memory crunch for thinsg like network receive. - */ -#define SLACK_DOMAIN_MEM_KILOBYTES 2048 - - -/* - * These will probably change in the future.. - * locations for 32-bit guest compatibility mappings - */ - -/* 4M of 32-bit machine-physical shadow in low 4G of VM space */ -#define SHADOW_MPT32_VIRT_START (0xfc000000) -#define SHADOW_MPT32_VIRT_END (SHADOW_MPT32_VIRT_START + (4 * 1024 * 1024)) - -/* 44M of I/O remap for 32-bit drivers */ -#define IOREMAP_LOW_VIRT_START (SHADOW_MPT32_VIRT_END) -#define IOREMAP_LOW_VIRT_END (IOREMAP_LOW_VIRT_START + (44 * 1024 * 1024)) - -/* 4M of 32-bit page table */ -#define SHADOW_PT32_VIRT_START (IOREMAP_LOW_VIRT_END) -#define SHADOW_PT32_VIRT_END (SHADOW_PT32_VIRT_START + (4 * 1024 * 1024)) - - -/* Linkage for x86 */ -#define FASTCALL(x) x __attribute__((regparm(3))) -#define asmlinkage __attribute__((regparm(0))) -#define __ALIGN .align 16,0x90 -#define __ALIGN_STR ".align 16,0x90" -#define SYMBOL_NAME_STR(X) #X -#define SYMBOL_NAME(X) X -#define SYMBOL_NAME_LABEL(X) X##: -#ifdef __ASSEMBLY__ -#define ALIGN __ALIGN -#define ALIGN_STR __ALIGN_STR -#define ENTRY(name) \ - .globl SYMBOL_NAME(name); \ - ALIGN; \ - SYMBOL_NAME_LABEL(name) -#endif - -#define PGT_base_page_table PGT_l4_page_table - -#define barrier() __asm__ __volatile__("": : :"memory") - -/* - * Hypervisor segment selectors - */ -#define __HYPERVISOR_CS64 0x0810 -#define __HYPERVISOR_CS32 0x0808 -#define __HYPERVISOR_DS 0x0818 - -#define NR_syscalls 256 - -#ifndef NDEBUG -#define MEMORY_GUARD -#endif - -#ifndef __ASSEMBLY__ -extern unsigned long _end; /* standard ELF symbol */ -extern void __out_of_line_bug(int line) __attribute__((noreturn)); -#define out_of_line_bug() __out_of_line_bug(__LINE__) -#endif /* __ASSEMBLY__ */ - -#endif /* __XEN_X86_64_CONFIG_H__ */ diff --git a/xen/include/asm-x86_64/cpufeature.h b/xen/include/asm-x86_64/cpufeature.h deleted file mode 100644 index 7d9f90e813..0000000000 --- a/xen/include/asm-x86_64/cpufeature.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * cpufeature.h - * - * Defines x86 CPU feature bits - */ - -#ifndef __ASM_X8664_CPUFEATURE_H -#define __ASM_X8664_CPUFEATURE_H - -/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */ -#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT) - -#define NCAPINTS 4 /* Currently we have 4 32-bit words worth of info */ - -/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */ -#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ -#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ -#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ -#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ -#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ -#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ - /* of FPU context), and CR4.OSFXSR available */ -#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ -#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ -#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ -#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ -#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ - -/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ -/* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ -#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ -#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ - -/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ - -/* Other features, Linux-defined mapping, word 3 */ -/* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ - -#endif /* __ASM_X8664_CPUFEATURE_H */ - -/* - * Local Variables: - * mode:c - * comment-column:42 - * End: - */ diff --git a/xen/include/asm-x86_64/current.h b/xen/include/asm-x86_64/current.h deleted file mode 100644 index d5ffb0720a..0000000000 --- a/xen/include/asm-x86_64/current.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef _X86_64_CURRENT_H -#define _X86_64_CURRENT_H - -#if !defined(__ASSEMBLY__) -struct task_struct; - -#include - -#define STACK_RESERVED \ - (sizeof(execution_context_t)) - -static inline struct task_struct * get_current(void) -{ - struct task_struct *current; - current = read_pda(pcurrent); - return current; -} - -#define current get_current() - -static inline void set_current(struct task_struct *p) -{ - write_pda(pcurrent,p); -} - -static inline execution_context_t *get_execution_context(void) -{ - execution_context_t *execution_context; - __asm__( "andq %%rsp,%0; addq %2,%0" - : "=r" (execution_context) - : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) ); - return execution_context; -} - -static inline unsigned long get_stack_top(void) -{ - unsigned long p; - __asm__ ( "orq %%rsp,%0; andq $~7,%0" - : "=r" (p) : "0" (STACK_SIZE-8) ); - return p; -} - -#define schedule_tail(_p) \ - __asm__ __volatile__ ( \ - "andq %%rsp,%0; addq %2,%0; movq %0,%%rsp; jmp *%1" \ - : : "r" (~(STACK_SIZE-1)), \ - "r" (unlikely(is_idle_task((_p))) ? \ - continue_cpu_idle_loop : \ - continue_nonidle_task), \ - "i" (STACK_SIZE-STACK_RESERVED) ) - - -#else - -#ifndef ASM_OFFSET_H -#include -#endif - -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg - -#endif - -#endif /* !(_X86_64_CURRENT_H) */ diff --git a/xen/include/asm-x86_64/debugreg.h b/xen/include/asm-x86_64/debugreg.h deleted file mode 100644 index bd1aab1d8c..0000000000 --- a/xen/include/asm-x86_64/debugreg.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef _X86_64_DEBUGREG_H -#define _X86_64_DEBUGREG_H - - -/* Indicate the register numbers for a number of the specific - debug registers. Registers 0-3 contain the addresses we wish to trap on */ -#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */ -#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */ - -#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */ -#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */ - -/* Define a few things for the status register. We can use this to determine - which debugging register was responsible for the trap. The other bits - are either reserved or not of interest to us. */ - -#define DR_TRAP0 (0x1) /* db0 */ -#define DR_TRAP1 (0x2) /* db1 */ -#define DR_TRAP2 (0x4) /* db2 */ -#define DR_TRAP3 (0x8) /* db3 */ - -#define DR_STEP (0x4000) /* single-step */ -#define DR_SWITCH (0x8000) /* task switch */ - -/* Now define a bunch of things for manipulating the control register. - The top two bytes of the control register consist of 4 fields of 4 - bits - each field corresponds to one of the four debug registers, - and indicates what types of access we trap on, and how large the data - field is that we are looking at */ - -#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */ -#define DR_CONTROL_SIZE 4 /* 4 control bits per register */ - -#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */ -#define DR_RW_WRITE (0x1) -#define DR_RW_READ (0x3) - -#define DR_LEN_1 (0x0) /* Settings for data length to trap on */ -#define DR_LEN_2 (0x4) -#define DR_LEN_4 (0xC) -#define DR_LEN_8 (0x8) - -/* The low byte to the control register determine which registers are - enabled. There are 4 fields of two bits. One bit is "local", meaning - that the processor will reset the bit after a task switch and the other - is global meaning that we have to explicitly reset the bit. With linux, - you can use either one, since we explicitly zero the register when we enter - kernel mode. */ - -#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ -#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ -#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ - -#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ -#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */ - -/* The second byte to the control register has a few special things. - We can slow the instruction pipeline for instructions coming via the - gdt or the ldt if we want to. I am not sure why this is an advantage */ - -#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */ -#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ -#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ - -#endif diff --git a/xen/include/asm-x86_64/delay.h b/xen/include/asm-x86_64/delay.h deleted file mode 100644 index a04cdb4346..0000000000 --- a/xen/include/asm-x86_64/delay.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _X86_64_DELAY_H -#define _X86_64_DELAY_H - -/* - * Copyright (C) 1993 Linus Torvalds - * - * Delay routines calling functions in arch/i386/lib/delay.c - */ - -extern unsigned long ticks_per_usec; -extern void __udelay(unsigned long usecs); -#define udelay(n) __udelay(n) - -#endif /* defined(_X86_64_DELAY_H) */ diff --git a/xen/include/asm-x86_64/desc.h b/xen/include/asm-x86_64/desc.h deleted file mode 100644 index e8556e976e..0000000000 --- a/xen/include/asm-x86_64/desc.h +++ /dev/null @@ -1,118 +0,0 @@ -#ifndef __ARCH_DESC_H -#define __ARCH_DESC_H - -#define LDT_ENTRY_SIZE 16 - -#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY - -#define __FIRST_PER_CPU_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8) - -#define __CPU_DESC_INDEX(x,field) \ - ((x) * sizeof(struct per_cpu_gdt) + offsetof(struct per_cpu_gdt, field) + (__FIRST_PER_CPU_ENTRY*8)) -#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY) - -#define load_TR(cpu) asm volatile("ltr %w0"::"r" (__CPU_DESC_INDEX(cpu, tss))); -#define __load_LDT(cpu) asm volatile("lldt %w0"::"r" (__CPU_DESC_INDEX(cpu, ldt))); -#define clear_LDT(n) asm volatile("lldt %w0"::"r" (0)) - -/* - * Guest OS must provide its own code selectors, or use the one we provide. The - * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector - * value is okay. Note that checking only the RPL is insufficient: if the - * selector is poked into an interrupt, trap or call gate then the RPL is - * ignored when the gate is accessed. - */ -#define VALID_SEL(_s) \ - (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \ - (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \ - ((_s)&4)) && \ - (((_s)&3) == 0)) -#define VALID_CODESEL(_s) ((_s) == FLAT_RING3_CS || VALID_SEL(_s)) - -/* These are bitmasks for the first 32 bits of a descriptor table entry. */ -#define _SEGMENT_TYPE (15<< 8) -#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */ -#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */ -#define _SEGMENT_P ( 1<<15) /* Segment Present */ -#define _SEGMENT_G ( 1<<23) /* Granularity */ - -#ifndef __ASSEMBLY__ - -enum { - GATE_INTERRUPT = 0xE, - GATE_TRAP = 0xF, - GATE_CALL = 0xC, -}; - -// 16byte gate -struct gate_struct { - u16 offset_low; - u16 segment; - unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; - u16 offset_middle; - u32 offset_high; - u32 zero1; -} __attribute__((packed)); - -// 8 byte segment descriptor -struct desc_struct { - u16 limit0; - u16 base0; - unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; - unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; -} __attribute__((packed)); - -// LDT or TSS descriptor in the GDT. 16 bytes. -struct ldttss_desc { - u16 limit0; - u16 base0; - unsigned base1 : 8, type : 5, dpl : 2, p : 1; - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; - u32 base3; - u32 zero1; -} __attribute__((packed)); - -// Union of above structures -union desc_union { - struct desc_struct seg; - struct ldttss_desc ldttss; - struct gate_struct gate; -}; - -struct per_cpu_gdt { - struct ldttss_desc tss; - struct ldttss_desc ldt; -} ____cacheline_aligned; - - -struct Xgt_desc_struct { - unsigned short size; - unsigned long address; -} __attribute__((packed)); - -extern __u8 gdt_table[]; -extern __u8 gdt_end[]; -extern union desc_union *gdt; - -extern struct per_cpu_gdt gdt_cpu_table[]; - -#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) -#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) -#define PTR_HIGH(x) ((unsigned long)(x) >> 32) - -enum { - DESC_TSS = 0x9, - DESC_LDT = 0x2, -}; - -extern struct gate_struct *idt; - -#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2)) -#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2)) - -extern void set_intr_gate(unsigned int irq, void * addr); -extern void set_tss_desc(unsigned int n, void *addr); - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/xen/include/asm-x86_64/dma.h b/xen/include/asm-x86_64/dma.h deleted file mode 100644 index e0854628d3..0000000000 --- a/xen/include/asm-x86_64/dma.h +++ /dev/null @@ -1,301 +0,0 @@ -/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $ - * linux/include/asm/dma.h: Defines for using and allocating dma channels. - * Written by Hennus Bergman, 1992. - * High DMA channel support & info by Hannu Savolainen - * and John Boyd, Nov. 1992. - */ - -#ifndef _ASM_DMA_H -#define _ASM_DMA_H - -#include -#include /* And spinlocks */ -#include /* need byte IO */ -#include - - -#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER -#define dma_outb outb_p -#else -#define dma_outb outb -#endif - -#define dma_inb inb - -/* - * NOTES about DMA transfers: - * - * controller 1: channels 0-3, byte operations, ports 00-1F - * controller 2: channels 4-7, word operations, ports C0-DF - * - * - ALL registers are 8 bits only, regardless of transfer size - * - channel 4 is not used - cascades 1 into 2. - * - channels 0-3 are byte - addresses/counts are for physical bytes - * - channels 5-7 are word - addresses/counts are for physical words - * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries - * - transfer count loaded to registers is 1 less than actual count - * - controller 2 offsets are all even (2x offsets for controller 1) - * - page registers for 5-7 don't use data bit 0, represent 128K pages - * - page registers for 0-3 use bit 0, represent 64K pages - * - * DMA transfers are limited to the lower 16MB of _physical_ memory. - * Note that addresses loaded into registers must be _physical_ addresses, - * not logical addresses (which may differ if paging is active). - * - * Address mapping for channels 0-3: - * - * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) - * | ... | | ... | | ... | - * | ... | | ... | | ... | - * | ... | | ... | | ... | - * P7 ... P0 A7 ... A0 A7 ... A0 - * | Page | Addr MSB | Addr LSB | (DMA registers) - * - * Address mapping for channels 5-7: - * - * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) - * | ... | \ \ ... \ \ \ ... \ \ - * | ... | \ \ ... \ \ \ ... \ (not used) - * | ... | \ \ ... \ \ \ ... \ - * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 - * | Page | Addr MSB | Addr LSB | (DMA registers) - * - * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses - * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at - * the hardware level, so odd-byte transfers aren't possible). - * - * Transfer count (_not # bytes_) is limited to 64K, represented as actual - * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, - * and up to 128K bytes may be transferred on channels 5-7 in one operation. - * - */ - -#define MAX_DMA_CHANNELS 8 - -#if 0 -/* The maximum address that we can perform a DMA transfer to on this platform */ -#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000) -#endif - - -/* 8237 DMA controllers */ -#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ -#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ - -/* DMA controller registers */ -#define DMA1_CMD_REG 0x08 /* command register (w) */ -#define DMA1_STAT_REG 0x08 /* status register (r) */ -#define DMA1_REQ_REG 0x09 /* request register (w) */ -#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ -#define DMA1_MODE_REG 0x0B /* mode register (w) */ -#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ -#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ -#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ -#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ -#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ - -#define DMA2_CMD_REG 0xD0 /* command register (w) */ -#define DMA2_STAT_REG 0xD0 /* status register (r) */ -#define DMA2_REQ_REG 0xD2 /* request register (w) */ -#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ -#define DMA2_MODE_REG 0xD6 /* mode register (w) */ -#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ -#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ -#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ -#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ -#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ - -#define DMA_ADDR_0 0x00 /* DMA address registers */ -#define DMA_ADDR_1 0x02 -#define DMA_ADDR_2 0x04 -#define DMA_ADDR_3 0x06 -#define DMA_ADDR_4 0xC0 -#define DMA_ADDR_5 0xC4 -#define DMA_ADDR_6 0xC8 -#define DMA_ADDR_7 0xCC - -#define DMA_CNT_0 0x01 /* DMA count registers */ -#define DMA_CNT_1 0x03 -#define DMA_CNT_2 0x05 -#define DMA_CNT_3 0x07 -#define DMA_CNT_4 0xC2 -#define DMA_CNT_5 0xC6 -#define DMA_CNT_6 0xCA -#define DMA_CNT_7 0xCE - -#define DMA_PAGE_0 0x87 /* DMA page registers */ -#define DMA_PAGE_1 0x83 -#define DMA_PAGE_2 0x81 -#define DMA_PAGE_3 0x82 -#define DMA_PAGE_5 0x8B -#define DMA_PAGE_6 0x89 -#define DMA_PAGE_7 0x8A - -#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ -#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ -#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */ - -#define DMA_AUTOINIT 0x10 - - -extern spinlock_t dma_spin_lock; - -static __inline__ unsigned long claim_dma_lock(void) -{ - unsigned long flags; - spin_lock_irqsave(&dma_spin_lock, flags); - return flags; -} - -static __inline__ void release_dma_lock(unsigned long flags) -{ - spin_unlock_irqrestore(&dma_spin_lock, flags); -} - -/* enable/disable a specific DMA channel */ -static __inline__ void enable_dma(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(dmanr, DMA1_MASK_REG); - else - dma_outb(dmanr & 3, DMA2_MASK_REG); -} - -static __inline__ void disable_dma(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(dmanr | 4, DMA1_MASK_REG); - else - dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); -} - -/* Clear the 'DMA Pointer Flip Flop'. - * Write 0 for LSB/MSB, 1 for MSB/LSB access. - * Use this once to initialize the FF to a known state. - * After that, keep track of it. :-) - * --- In order to do that, the DMA routines below should --- - * --- only be used while holding the DMA lock ! --- - */ -static __inline__ void clear_dma_ff(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(0, DMA1_CLEAR_FF_REG); - else - dma_outb(0, DMA2_CLEAR_FF_REG); -} - -/* set mode (above) for a specific DMA channel */ -static __inline__ void set_dma_mode(unsigned int dmanr, char mode) -{ - if (dmanr<=3) - dma_outb(mode | dmanr, DMA1_MODE_REG); - else - dma_outb(mode | (dmanr&3), DMA2_MODE_REG); -} - -/* Set only the page register bits of the transfer address. - * This is used for successive transfers when we know the contents of - * the lower 16 bits of the DMA current address register, but a 64k boundary - * may have been crossed. - */ -static __inline__ void set_dma_page(unsigned int dmanr, char pagenr) -{ - switch(dmanr) { - case 0: - dma_outb(pagenr, DMA_PAGE_0); - break; - case 1: - dma_outb(pagenr, DMA_PAGE_1); - break; - case 2: - dma_outb(pagenr, DMA_PAGE_2); - break; - case 3: - dma_outb(pagenr, DMA_PAGE_3); - break; - case 5: - dma_outb(pagenr & 0xfe, DMA_PAGE_5); - break; - case 6: - dma_outb(pagenr & 0xfe, DMA_PAGE_6); - break; - case 7: - dma_outb(pagenr & 0xfe, DMA_PAGE_7); - break; - } -} - - -/* Set transfer address & page bits for specific DMA channel. - * Assumes dma flipflop is clear. - */ -static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a) -{ - set_dma_page(dmanr, a>>16); - if (dmanr <= 3) { - dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); - dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); - } else { - dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); - dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); - } -} - - -/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for - * a specific DMA channel. - * You must ensure the parameters are valid. - * NOTE: from a manual: "the number of transfers is one more - * than the initial word count"! This is taken into account. - * Assumes dma flip-flop is clear. - * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. - */ -static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) -{ - count--; - if (dmanr <= 3) { - dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); - dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); - } else { - dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); - dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); - } -} - - -/* Get DMA residue count. After a DMA transfer, this - * should return zero. Reading this while a DMA transfer is - * still in progress will return unpredictable results. - * If called before the channel has been used, it may return 1. - * Otherwise, it returns the number of _bytes_ left to transfer. - * - * Assumes DMA flip-flop is clear. - */ -static __inline__ int get_dma_residue(unsigned int dmanr) -{ - unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE - : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE; - - /* using short to get 16-bit wrap around */ - unsigned short count; - - count = 1 + dma_inb(io_port); - count += dma_inb(io_port) << 8; - - return (dmanr<=3)? count : (count<<1); -} - - -/* These are in kernel/dma.c: */ -extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */ -extern void free_dma(unsigned int dmanr); /* release it again */ - -/* From PCI */ - -#ifdef CONFIG_PCI -extern int isa_dma_bridge_buggy; -#else -#define isa_dma_bridge_buggy (0) -#endif - -#endif /* _ASM_DMA_H */ diff --git a/xen/include/asm-x86_64/domain_page.h b/xen/include/asm-x86_64/domain_page.h deleted file mode 100644 index f093ee2d96..0000000000 --- a/xen/include/asm-x86_64/domain_page.h +++ /dev/null @@ -1,27 +0,0 @@ -/****************************************************************************** - * domain_page.h - * - * Allow temporary mapping of domain page frames into Xen space. - */ - -#ifndef __ASM_DOMAIN_PAGE_H__ -#define __ASM_DOMAIN_PAGE_H__ - -#include -#include -#include - -/* - * Maps a given physical address, returning corresponding virtual address. - * The entire page containing that VA is now accessible until a - * corresponding call to unmap_domain_mem(). - */ -#define map_domain_mem(pa) __va(pa) - -/* - * Pass a VA within a page previously mapped with map_domain_mem(). - * That page will then be removed from the mapping lists. - */ -#define unmap_domain_mem(va) {} - -#endif /* __ASM_DOMAIN_PAGE_H__ */ diff --git a/xen/include/asm-x86_64/fixmap.h b/xen/include/asm-x86_64/fixmap.h deleted file mode 100644 index d3f9803af4..0000000000 --- a/xen/include/asm-x86_64/fixmap.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * fixmap.h: compile-time virtual memory allocation - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998 Ingo Molnar - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - */ - -#ifndef _ASM_FIXMAP_H -#define _ASM_FIXMAP_H - -#include -#include -#include - -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. We allocate these special addresses - * from the end of virtual memory (0xfffff000) backwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * highger than 1) use fixmap_set(idx,phys) to associate - * physical memory with fixmap indices. - * - * TLB entries of such buffers will not be flushed across - * task switches. - */ - -/* - * on UP currently we will have no trace of the fixmap mechanizm, - * no page table allocations, etc. This might change in the - * future, say framebuffers for the console driver(s) could be - * fix-mapped? - */ -enum fixed_addresses { -#ifdef CONFIG_X86_LOCAL_APIC - FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ -#endif -#ifdef CONFIG_X86_IO_APIC - FIX_IO_APIC_BASE_0, - FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, -#endif -#ifdef CONFIG_HIGHMEM - FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -#endif - __end_of_fixed_addresses -}; - -extern void __set_fixmap (enum fixed_addresses idx, - l1_pgentry_t entry); - -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR)) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE)) -/* - * used by vmalloc.c. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap, and leave one page empty - * at the top of mem.. - */ -#define FIXADDR_TOP (0xffffffffffffe000UL) -#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -#endif diff --git a/xen/include/asm-x86_64/flushtlb.h b/xen/include/asm-x86_64/flushtlb.h deleted file mode 100644 index f0d4bb946c..0000000000 --- a/xen/include/asm-x86_64/flushtlb.h +++ /dev/null @@ -1,49 +0,0 @@ -/****************************************************************************** - * flushtlb.h - * - * TLB flushes are timestamped using a global virtual 'clock' which ticks - * on any TLB flush on any processor. - * - * Copyright (c) 2003, K A Fraser - */ - -#ifndef __FLUSHTLB_H__ -#define __FLUSHTLB_H__ - -#include - -/* - * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed. - * Therefore, if the current TLB time and a previously-read timestamp differ - * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock - * has wrapped at least once and every CPU's TLB is guaranteed to have been - * flushed meanwhile. - * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock. - */ -#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1) - -/* - * 'cpu_stamp' is the current timestamp for the CPU we are testing. - * 'lastuse_stamp' is a timestamp taken when the PFN we are testing was last - * used for a purpose that may have caused the CPU's TLB to become tainted. - */ -static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp) -{ - /* - * Why does this work? - * 1. XOR sets high-order bits determines if stamps from differing epochs. - * 2. Subtraction sets high-order bits if 'cpu_stamp > lastuse_stamp'. - * In either case a flush is unnecessary: we therefore OR the results from - * (1) and (2), mask the high-order bits, and return the inverse. - */ - return !(((lastuse_stamp^cpu_stamp)|(lastuse_stamp-cpu_stamp)) & - ~TLBCLOCK_EPOCH_MASK); -} - -extern u32 tlbflush_clock; -extern u32 tlbflush_time[NR_CPUS]; - -extern void tlb_clocktick(void); -extern void new_tlbflush_clock_period(void); - -#endif /* __FLUSHTLB_H__ */ diff --git a/xen/include/asm-x86_64/hardirq.h b/xen/include/asm-x86_64/hardirq.h deleted file mode 100644 index c59f6e3e0f..0000000000 --- a/xen/include/asm-x86_64/hardirq.h +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef __ASM_HARDIRQ_H -#define __ASM_HARDIRQ_H - -#include -#include - -/* assembly code in softirq.h is sensitive to the offsets of these fields */ -typedef struct { - unsigned int __softirq_pending; - unsigned int __local_irq_count; - unsigned int __local_bh_count; - unsigned int __syscall_count; - unsigned int __nmi_count; - unsigned long idle_timestamp; -} ____cacheline_aligned irq_cpustat_t; - -#include /* Standard mappings for irq_cpustat_t above */ - -/* - * Are we in an interrupt context? Either doing bottom half - * or hardware interrupt processing? - */ -#define in_interrupt() ({ int __cpu = smp_processor_id(); \ - (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }) - -#define in_irq() (local_irq_count(smp_processor_id()) != 0) - -#ifndef CONFIG_SMP - -#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0) -#define hardirq_endlock(cpu) do { } while (0) - -#define irq_enter(cpu, irq) (local_irq_count(cpu)++) -#define irq_exit(cpu, irq) (local_irq_count(cpu)--) - -#define synchronize_irq() barrier() - -#else - -#include -#include - -extern unsigned char global_irq_holder; -extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */ - -static inline int irqs_running (void) -{ - int i; - - for (i = 0; i < smp_num_cpus; i++) - if (local_irq_count(i)) - return 1; - return 0; -} - -static inline void release_irqlock(int cpu) -{ - /* if we didn't own the irq lock, just ignore.. */ - if (global_irq_holder == (unsigned char) cpu) { - global_irq_holder = 0xff; - clear_bit(0,&global_irq_lock); - } -} - -static inline void irq_enter(int cpu, int irq) -{ - ++local_irq_count(cpu); - - while (test_bit(0,&global_irq_lock)) { - cpu_relax(); - } -} - -static inline void irq_exit(int cpu, int irq) -{ - --local_irq_count(cpu); -} - -static inline int hardirq_trylock(int cpu) -{ - return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock); -} - -#define hardirq_endlock(cpu) do { } while (0) - -extern void synchronize_irq(void); - -#endif /* CONFIG_SMP */ - -#endif /* __ASM_HARDIRQ_H */ diff --git a/xen/include/asm-x86_64/hdreg.h b/xen/include/asm-x86_64/hdreg.h deleted file mode 100644 index 18561aaed3..0000000000 --- a/xen/include/asm-x86_64/hdreg.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * linux/include/asm-x86_64/hdreg.h - * - * Copyright (C) 1994-1996 Linus Torvalds & authors - */ - -#ifndef __ASMx86_64_HDREG_H -#define __ASMx86_64_HDREG_H - -//typedef unsigned short ide_ioreg_t; -typedef unsigned long ide_ioreg_t; - -#endif /* __ASMx86_64_HDREG_H */ diff --git a/xen/include/asm-x86_64/i387.h b/xen/include/asm-x86_64/i387.h deleted file mode 100644 index 95a6bb6cde..0000000000 --- a/xen/include/asm-x86_64/i387.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * include/asm-i386/i387.h - * - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes , May 2000 - */ - -#ifndef __ASM_I386_I387_H -#define __ASM_I386_I387_H - -#include -#include - -extern void init_fpu(void); -extern void save_init_fpu( struct task_struct *tsk ); -extern void restore_fpu( struct task_struct *tsk ); - -#define unlazy_fpu( tsk ) do { \ - if ( test_bit(PF_USEDFPU, &tsk->flags) ) \ - save_init_fpu( tsk ); \ -} while (0) - -#define clear_fpu( tsk ) do { \ - if ( test_and_clear_bit(PF_USEDFPU, &tsk->flags) ) { \ - asm volatile("fwait"); \ - stts(); \ - } \ -} while (0) - -#define load_mxcsr( val ) do { \ - unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \ - asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \ -} while (0) - -#endif /* __ASM_I386_I387_H */ diff --git a/xen/include/asm-x86_64/ide.h b/xen/include/asm-x86_64/ide.h deleted file mode 100644 index 05de458761..0000000000 --- a/xen/include/asm-x86_64/ide.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * linux/include/asm-x86_64/ide.h - * - * Copyright (C) 1994-1996 Linus Torvalds & authors - */ - -/* - * This file contains the x86_64 architecture specific IDE code. - */ - -#ifndef __ASMx86_64_IDE_H -#define __ASMx86_64_IDE_H - -#ifdef __KERNEL__ - -#include - -#ifndef MAX_HWIFS -# ifdef CONFIG_BLK_DEV_IDEPCI -#define MAX_HWIFS 10 -# else -#define MAX_HWIFS 6 -# endif -#endif - -static __inline__ int ide_default_irq(ide_ioreg_t base) -{ - switch (base) { - case 0x1f0: return 14; - case 0x170: return 15; - case 0x1e8: return 11; - case 0x168: return 10; - case 0x1e0: return 8; - case 0x160: return 12; - default: - return 0; - } -} - -static __inline__ ide_ioreg_t ide_default_io_base(int index) -{ - switch (index) { - case 0: return 0x1f0; - case 1: return 0x170; - case 2: return 0x1e8; - case 3: return 0x168; - case 4: return 0x1e0; - case 5: return 0x160; - default: - return 0; - } -} - -static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq) -{ - ide_ioreg_t reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -static __inline__ void ide_init_default_hwifs(void) -{ -#ifndef CONFIG_BLK_DEV_IDEPCI - hw_regs_t hw; - int index; - - for(index = 0; index < MAX_HWIFS; index++) { - memset(&hw, 0, sizeof hw); - ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); - hw.irq = ide_default_irq(ide_default_io_base(index)); - ide_register_hw(&hw, NULL); - } -#endif /* CONFIG_BLK_DEV_IDEPCI */ -} - -typedef union { - unsigned all : 8; /* all of the bits together */ - struct { - unsigned head : 4; /* always zeros here */ - unsigned unit : 1; /* drive select number, 0 or 1 */ - unsigned bit5 : 1; /* always 1 */ - unsigned lba : 1; /* using LBA instead of CHS */ - unsigned bit7 : 1; /* always 1 */ - } b; -} select_t; - -typedef union { - unsigned all : 8; /* all of the bits together */ - struct { - unsigned bit0 : 1; - unsigned nIEN : 1; /* device INTRQ to host */ - unsigned SRST : 1; /* host soft reset bit */ - unsigned bit3 : 1; /* ATA-2 thingy */ - unsigned reserved456 : 3; - unsigned HOB : 1; /* 48-bit address ordering */ - } b; -} control_t; - -#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) -#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) -#define ide_check_region(from,extent) check_region((from), (extent)) -#define ide_request_region(from,extent,name) request_region((from), (extent), (name)) -#define ide_release_region(from,extent) release_region((from), (extent)) - -/* - * The following are not needed for the non-m68k ports - */ -#define ide_ack_intr(hwif) (1) -#define ide_fix_driveid(id) do {} while (0) -#define ide_release_lock(lock) do {} while (0) -#define ide_get_lock(lock, hdlr, data) do {} while (0) - -#endif /* __KERNEL__ */ - -#endif /* __ASMx86_64_IDE_H */ diff --git a/xen/include/asm-x86_64/io.h b/xen/include/asm-x86_64/io.h deleted file mode 100644 index 35ec64235e..0000000000 --- a/xen/include/asm-x86_64/io.h +++ /dev/null @@ -1,273 +0,0 @@ -#ifndef _ASM_IO_H -#define _ASM_IO_H - -#include -#include - -/* - * This file contains the definitions for the x86 IO instructions - * inb/inw/inl/outb/outw/outl and the "string versions" of the same - * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" - * versions of the single-IO instructions (inb_p/inw_p/..). - * - * This file is not meant to be obfuscating: it's just complicated - * to (a) handle it all in a way that makes gcc able to optimize it - * as well as possible and (b) trying to avoid writing the same thing - * over and over again with slight variations and possibly making a - * mistake somewhere. - */ - -/* - * Thanks to James van Artsdalen for a better timing-fix than - * the two short jumps: using outb's to a nonexistent port seems - * to guarantee better timings even on fast machines. - * - * On the other hand, I'd like to be sure of a non-existent port: - * I feel a bit unsafe about using 0x80 (should be safe, though) - * - * Linus - */ - - /* - * Bit simplified and optimized by Jan Hubicka - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. - * - * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, - * isa_read[wl] and isa_write[wl] fixed - * - Arnaldo Carvalho de Melo - */ - -#ifdef SLOW_IO_BY_JUMPING -#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" -#else -#define __SLOW_DOWN_IO "\noutb %%al,$0x80" -#endif - -#ifdef REALLY_SLOW_IO -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO -#else -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO -#endif - -/* - * Talk about misusing macros.. - */ -#define __OUT1(s,x) \ -extern inline void out##s(unsigned x value, unsigned short port) { - -#define __OUT2(s,s1,s2) \ -__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" - -#define __OUT(s,s1,x) \ -__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ - -#define __IN1(s) \ -extern inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; - -#define __IN2(s,s1,s2) \ -__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" - -#define __IN(s,s1,i...) \ -__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ - -#define __INS(s) \ -extern inline void ins##s(unsigned short port, void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; ins" #s \ -: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define __OUTS(s) \ -extern inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ -{ __asm__ __volatile__ ("rep ; outs" #s \ -: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } - -#define RETURN_TYPE unsigned char -__IN(b,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned short -__IN(w,"") -#undef RETURN_TYPE -#define RETURN_TYPE unsigned int -__IN(l,"") -#undef RETURN_TYPE - -__OUT(b,"b",char) -__OUT(w,"w",short) -__OUT(l,,int) - -__INS(b) -__INS(w) -__INS(l) - -__OUTS(b) -__OUTS(w) -__OUTS(l) - -#define IO_SPACE_LIMIT 0xffff - -/* - * Temporary debugging check to catch old code using - * unmapped ISA addresses. Will be removed in 2.4. - */ -#ifdef CONFIG_IO_DEBUG - extern void *__io_virt_debug(unsigned long x, const char *file, int line); - extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); - #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) -//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__) -#else - #define __io_virt(x) ((void *)(x)) -//#define __io_phys(x) __pa(x) -#endif - -/* - * Change virtual addresses to physical addresses and vv. - * These are pretty trivial - */ -extern inline unsigned long virt_to_phys(volatile void * address) -{ - return __pa(address); -} - -extern inline void * phys_to_virt(unsigned long address) -{ - return __va(address); -} - -/* - * Change "struct page" to physical address. - */ -#ifdef CONFIG_DISCONTIGMEM -#include -#else -#define page_to_phys(page) (((page) - frame_table) << PAGE_SHIFT) -#endif - -#define page_to_pfn(page) ((unsigned long)((_page) - frame_table)) -#define page_to_virt(page) (phys_to_virt(page_to_phys(_page))) - -extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); - -extern inline void * ioremap (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, 0); -} - -/* - * This one maps high address device memory and turns off caching for that area. - * it's useful if some control registers are in such an area and write combining - * or read caching is not desirable: - */ -extern inline void * ioremap_nocache (unsigned long offset, unsigned long size) -{ - return __ioremap(offset, size, _PAGE_PCD); -} - -extern void iounmap(void *addr); - -/* - * IO bus memory addresses are also 1:1 with the physical address - */ -#define virt_to_bus virt_to_phys -#define bus_to_virt phys_to_virt -#define page_to_bus page_to_phys - -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -#define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) -#define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) -#define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) -#define readq(addr) (*(volatile unsigned long *) __io_virt(addr)) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl -#define __raw_readq readq - -#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) -#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) -#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) -#define writeq(b,addr) (*(volatile unsigned long *) __io_virt(addr) = (b)) -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel -#define __raw_writeq writeq - -void *memcpy_fromio(void*,const void*,unsigned); -void *memcpy_toio(void*,const void*,unsigned); - -#define memset_io(a,b,c) memset(__io_virt(a),(b),(c)) - -/* - * ISA space is 'always mapped' on a typical x86 system, no need to - * explicitly ioremap() it. The fact that the ISA IO space is mapped - * to PAGE_OFFSET is pure coincidence - it does not mean ISA values - * are physical addresses. The following constant pointer can be - * used as the IO-area pointer (it can be iounmapped as well, so the - * analogy with PCI is quite large): - */ -#define __ISA_IO_base ((char *)(PAGE_OFFSET)) - -#define isa_readb(a) readb(__ISA_IO_base + (a)) -#define isa_readw(a) readw(__ISA_IO_base + (a)) -#define isa_readl(a) readl(__ISA_IO_base + (a)) -#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) -#define isa_writew(w,a) writew(w,__ISA_IO_base + (a)) -#define isa_writel(l,a) writel(l,__ISA_IO_base + (a)) -#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c)) -#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c)) -#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c)) - - -/* - * Again, x86-64 does not require mem IO specific function. - */ - -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) -#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d)) - -static inline int check_signature(unsigned long io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -static inline int isa_check_signature(unsigned long io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (isa_readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} - -/* Nothing to do */ - -#define dma_cache_inv(_start,_size) do { } while (0) -#define dma_cache_wback(_start,_size) do { } while (0) -#define dma_cache_wback_inv(_start,_size) do { } while (0) - -#define flush_write_buffers() - -#endif diff --git a/xen/include/asm-x86_64/io_apic.h b/xen/include/asm-x86_64/io_apic.h deleted file mode 100644 index d5d2e4c439..0000000000 --- a/xen/include/asm-x86_64/io_apic.h +++ /dev/null @@ -1,148 +0,0 @@ -#ifndef __ASM_IO_APIC_H -#define __ASM_IO_APIC_H - -#include -#include - -/* - * Intel IO-APIC support for SMP and UP systems. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar - */ - -#ifdef CONFIG_X86_IO_APIC - -#define APIC_MISMATCH_DEBUG - -#define IO_APIC_BASE(idx) \ - ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \ - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK))) - -/* - * The structure of the IO-APIC: - */ -struct IO_APIC_reg_00 { - __u32 __reserved_2 : 24, - ID : 4, - __reserved_1 : 4; -} __attribute__ ((packed)); - -struct IO_APIC_reg_01 { - __u32 version : 8, - __reserved_2 : 7, - PRQ : 1, - entries : 8, - __reserved_1 : 8; -} __attribute__ ((packed)); - -struct IO_APIC_reg_02 { - __u32 __reserved_2 : 24, - arbitration : 4, - __reserved_1 : 4; -} __attribute__ ((packed)); - -/* - * # of IO-APICs and # of IRQ routing registers - */ -extern int nr_ioapics; -extern int nr_ioapic_registers[MAX_IO_APICS]; - -enum ioapic_irq_destination_types { - dest_Fixed = 0, - dest_LowestPrio = 1, - dest_SMI = 2, - dest__reserved_1 = 3, - dest_NMI = 4, - dest_INIT = 5, - dest__reserved_2 = 6, - dest_ExtINT = 7 -}; - -struct IO_APIC_route_entry { - __u32 vector : 8, - delivery_mode : 3, /* 000: FIXED - * 001: lowest prio - * 111: ExtINT - */ - dest_mode : 1, /* 0: physical, 1: logical */ - delivery_status : 1, - polarity : 1, - irr : 1, - trigger : 1, /* 0: edge, 1: level */ - mask : 1, /* 0: enabled, 1: disabled */ - __reserved_2 : 15; - - union { struct { __u32 - __reserved_1 : 24, - physical_dest : 4, - __reserved_2 : 4; - } physical; - - struct { __u32 - __reserved_1 : 24, - logical_dest : 8; - } logical; - } dest; - -} __attribute__ ((packed)); - -/* - * MP-BIOS irq configuration table structures: - */ - -/* I/O APIC entries */ -extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; - -/* # of MP IRQ source entries */ -extern int mp_irq_entries; - -/* MP IRQ source entries */ -extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* non-0 if default (table-less) MP configuration */ -extern int mpc_default_type; - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - *IO_APIC_BASE(apic) = reg; - return *(IO_APIC_BASE(apic)+4); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - *IO_APIC_BASE(apic) = reg; - *(IO_APIC_BASE(apic)+4) = value; -} - -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - */ -static inline void io_apic_modify(unsigned int apic, unsigned int value) -{ - *(IO_APIC_BASE(apic)+4) = value; -} - -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - (void) *(IO_APIC_BASE(apic)+4); -} - -/* 1 if "noapic" boot option passed */ -extern int skip_ioapic_setup; - -/* - * If we use the IO-APIC for IRQ routing, disable automatic - * assignment of PCI IRQ's. - */ -#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup) - -#else /* !CONFIG_X86_IO_APIC */ -#define io_apic_assign_pci_irqs 0 -#endif - -#endif diff --git a/xen/include/asm-x86_64/irq.h b/xen/include/asm-x86_64/irq.h deleted file mode 100644 index bbb83c2d95..0000000000 --- a/xen/include/asm-x86_64/irq.h +++ /dev/null @@ -1,136 +0,0 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar */ - -#include -#include - -#define SA_INTERRUPT 0x20000000 -#define SA_SHIRQ 0x04000000 -#define SA_NOPROFILE 0x02000000 - -#define SA_SAMPLE_RANDOM 0 /* Linux driver compatibility */ - -#define TIMER_IRQ 0 - -extern void disable_irq(unsigned int); -extern void disable_irq_nosync(unsigned int); -extern void enable_irq(unsigned int); - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define NR_VECTORS 256 -#define FIRST_EXTERNAL_VECTOR 0x30 - -#ifdef CONFIG_X86_IO_APIC -#define NR_IRQS 224 -#else -#define NR_IRQS 16 -#endif - -#define HYPERVISOR_CALL_VECTOR 0x82 - -/* - * Vectors 0x30-0x3f are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define INVALIDATE_TLB_VECTOR 0xfd -#define EVENT_CHECK_VECTOR 0xfc -#define CALL_FUNCTION_VECTOR 0xfb -#define KDB_VECTOR 0xfa -#define TASK_MIGRATION_VECTOR 0xf9 - -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x40-0xee) - * we start at 0x41 to spread out vectors evenly between priority - * levels. (0x82 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR 0x41 -#define FIRST_SYSTEM_VECTOR 0xef - -extern int irq_vector[NR_IRQS]; -#define IO_APIC_VECTOR(irq) irq_vector[irq] - -/* - * Various low-level irq details needed by irq.c, process.c, - * time.c, io_apic.c and smp.c - * - * Interrupt entry/exit code at both C and assembly level - */ - -extern void mask_irq(unsigned int irq); -extern void unmask_irq(unsigned int irq); -extern void disable_8259A_irq(unsigned int irq); -extern void enable_8259A_irq(unsigned int irq); -extern int i8259A_irq_pending(unsigned int irq); -extern void make_8259A_irq(unsigned int irq); -extern void init_8259A(int aeoi); -extern void FASTCALL(send_IPI_self(int vector)); -extern void init_VISWS_APIC_irqs(void); -extern void setup_IO_APIC(void); -extern void disable_IO_APIC(void); -extern void print_IO_APIC(void); -extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); -extern void send_IPI(int dest, int vector); - -extern unsigned long io_apic_irqs; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; - -extern char _stext, _etext; - -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) - -#define __STR(x) #x -#define STR(x) __STR(x) - -#define IRQ_NAME2(nr) nr##_interrupt(void) -#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - -#define BUILD_IRQ(nr) \ -asmlinkage void IRQ_NAME(nr); \ -__asm__( \ -"\n.p2align\n" \ -SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ - "push $"#nr"-256\n\t" \ - "jmp common_interrupt"); - -extern unsigned long prof_cpu_mask; -extern unsigned int * prof_buffer; -extern unsigned long prof_len; -extern unsigned long prof_shift; - -#include - -#if defined(CONFIG_X86_IO_APIC) -static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) { - if (IO_APIC_IRQ(i)) - send_IPI_self(IO_APIC_VECTOR(i)); -} -#else -static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {} -#endif - -#endif /* _ASM_HW_IRQ_H */ diff --git a/xen/include/asm-x86_64/ldt.h b/xen/include/asm-x86_64/ldt.h deleted file mode 100644 index e0f139829e..0000000000 --- a/xen/include/asm-x86_64/ldt.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef __ARCH_LDT_H -#define __ARCH_LDT_H - -#ifndef __ASSEMBLY__ - -static inline void load_LDT(struct task_struct *p) -{ - unsigned long ents; - - if ( (ents = p->mm.ldt_ents) == 0 ) - { - __asm__ __volatile__ ( "lldt %w0" : : "r" (0) ); - } - else - { - unsigned int cpu; - struct ldttss_desc *desc; - - cpu = smp_processor_id(); - desc = (struct ldttss_desc *)((char *)GET_GDT_ADDRESS(p) + __CPU_DESC_INDEX(cpu, ldt)); - desc->limit0 = ents*8-1; - desc->base0 = LDT_VIRT_START&0xffff; - desc->base1 = (LDT_VIRT_START&0xff0000)>>16; - desc->type = DESC_LDT; - desc->dpl = 0; - desc->p = 1; - desc->limit1 = 0; - desc->zero0 = 0; - desc->g = 0; - desc->base2 = (LDT_VIRT_START&0xff000000)>>24; - desc->base3 = LDT_VIRT_START>>32; - desc->zero1 = 0; - __load_LDT(cpu); - } -} - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/xen/include/asm-x86_64/mc146818rtc.h b/xen/include/asm-x86_64/mc146818rtc.h deleted file mode 100644 index 8758528f7c..0000000000 --- a/xen/include/asm-x86_64/mc146818rtc.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Machine dependent access functions for RTC registers. - */ -#ifndef _ASM_MC146818RTC_H -#define _ASM_MC146818RTC_H - -#include -#include - -extern spinlock_t rtc_lock; /* serialize CMOS RAM access */ - -/********************************************************************** - * register summary - **********************************************************************/ -#define RTC_SECONDS 0 -#define RTC_SECONDS_ALARM 1 -#define RTC_MINUTES 2 -#define RTC_MINUTES_ALARM 3 -#define RTC_HOURS 4 -#define RTC_HOURS_ALARM 5 -/* RTC_*_alarm is always true if 2 MSBs are set */ -# define RTC_ALARM_DONT_CARE 0xC0 - -#define RTC_DAY_OF_WEEK 6 -#define RTC_DAY_OF_MONTH 7 -#define RTC_MONTH 8 -#define RTC_YEAR 9 - -/* control registers - Moto names - */ -#define RTC_REG_A 10 -#define RTC_REG_B 11 -#define RTC_REG_C 12 -#define RTC_REG_D 13 - -/********************************************************************** - * register details - **********************************************************************/ -#define RTC_FREQ_SELECT RTC_REG_A - -/* update-in-progress - set to "1" 244 microsecs before RTC goes off the bus, - * reset after update (may take 1.984ms @ 32768Hz RefClock) is complete, - * totalling to a max high interval of 2.228 ms. - */ -# define RTC_UIP 0x80 -# define RTC_DIV_CTL 0x70 - /* divider control: refclock values 4.194 / 1.049 MHz / 32.768 kHz */ -# define RTC_REF_CLCK_4MHZ 0x00 -# define RTC_REF_CLCK_1MHZ 0x10 -# define RTC_REF_CLCK_32KHZ 0x20 - /* 2 values for divider stage reset, others for "testing purposes only" */ -# define RTC_DIV_RESET1 0x60 -# define RTC_DIV_RESET2 0x70 - /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ -# define RTC_RATE_SELECT 0x0F - -/**********************************************************************/ -#define RTC_CONTROL RTC_REG_B -# define RTC_SET 0x80 /* disable updates for clock setting */ -# define RTC_PIE 0x40 /* periodic interrupt enable */ -# define RTC_AIE 0x20 /* alarm interrupt enable */ -# define RTC_UIE 0x10 /* update-finished interrupt enable */ -# define RTC_SQWE 0x08 /* enable square-wave output */ -# define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ -# define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ -# define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */ - -/**********************************************************************/ -#define RTC_INTR_FLAGS RTC_REG_C -/* caution - cleared by read */ -# define RTC_IRQF 0x80 /* any of the following 3 is active */ -# define RTC_PF 0x40 -# define RTC_AF 0x20 -# define RTC_UF 0x10 - -/**********************************************************************/ -#define RTC_VALID RTC_REG_D -# define RTC_VRT 0x80 /* valid RAM and time */ -/**********************************************************************/ - -/* example: !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) - * determines if the following two #defines are needed - */ -#ifndef BCD_TO_BIN -#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10) -#endif - -#ifndef BIN_TO_BCD -#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10) -#endif - - -#ifndef RTC_PORT -#define RTC_PORT(x) (0x70 + (x)) -#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ -#endif - -/* - * The yet supported machines all access the RTC index register via - * an ISA port access but the way to access the date register differs ... - */ -#define CMOS_READ(addr) ({ \ -outb_p((addr),RTC_PORT(0)); \ -inb_p(RTC_PORT(1)); \ -}) -#define CMOS_WRITE(val, addr) ({ \ -outb_p((addr),RTC_PORT(0)); \ -outb_p((val),RTC_PORT(1)); \ -}) - -#define RTC_IRQ 8 - -#endif /* _ASM_MC146818RTC_H */ diff --git a/xen/include/asm-x86_64/mpspec.h b/xen/include/asm-x86_64/mpspec.h deleted file mode 100644 index fa5d7aa2df..0000000000 --- a/xen/include/asm-x86_64/mpspec.h +++ /dev/null @@ -1,212 +0,0 @@ -#ifndef __ASM_MPSPEC_H -#define __ASM_MPSPEC_H - - -/* - * Structure definitions for SMP machines following the - * Intel Multiprocessing Specification 1.1 and 1.4. - */ - -/* - * This tag identifies where the SMP configuration - * information is. - */ - -#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_') - -/* - * a maximum of 16 APICs with the current APIC ID architecture. - * xAPICs can have up to 256. SAPICs have 16 ID bits. - */ -#ifdef CONFIG_X86_CLUSTERED_APIC -#define MAX_APICS 256 -#else -#define MAX_APICS 16 -#endif - -#define MAX_MPC_ENTRY 1024 - -struct intel_mp_floating -{ - char mpf_signature[4]; /* "_MP_" */ - unsigned int mpf_physptr; /* Configuration table address */ - unsigned char mpf_length; /* Our length (paragraphs) */ - unsigned char mpf_specification;/* Specification version */ - unsigned char mpf_checksum; /* Checksum (makes sum 0) */ - unsigned char mpf_feature1; /* Standard or configuration ? */ - unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ - unsigned char mpf_feature3; /* Unused (0) */ - unsigned char mpf_feature4; /* Unused (0) */ - unsigned char mpf_feature5; /* Unused (0) */ -}; - -struct mp_config_table -{ - char mpc_signature[4]; -#define MPC_SIGNATURE "PCMP" - unsigned short mpc_length; /* Size of table */ - char mpc_spec; /* 0x01 */ - char mpc_checksum; - char mpc_oem[8]; - char mpc_productid[12]; - unsigned int mpc_oemptr; /* 0 if not present */ - unsigned short mpc_oemsize; /* 0 if not present */ - unsigned short mpc_oemcount; - unsigned int mpc_lapic; /* APIC address */ - unsigned int reserved; -}; - -/* Followed by entries */ - -#define MP_PROCESSOR 0 -#define MP_BUS 1 -#define MP_IOAPIC 2 -#define MP_INTSRC 3 -#define MP_LINTSRC 4 -#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */ - -struct mpc_config_processor -{ - unsigned char mpc_type; - unsigned char mpc_apicid; /* Local APIC number */ - unsigned char mpc_apicver; /* Its versions */ - unsigned char mpc_cpuflag; -#define CPU_ENABLED 1 /* Processor is available */ -#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ - unsigned int mpc_cpufeature; -#define CPU_STEPPING_MASK 0x0F -#define CPU_MODEL_MASK 0xF0 -#define CPU_FAMILY_MASK 0xF00 - unsigned int mpc_featureflag; /* CPUID feature value */ - unsigned int mpc_reserved[2]; -}; - -struct mpc_config_bus -{ - unsigned char mpc_type; - unsigned char mpc_busid; - unsigned char mpc_bustype[6] __attribute((packed)); -}; - -/* List of Bus Type string values, Intel MP Spec. */ -#define BUSTYPE_EISA "EISA" -#define BUSTYPE_ISA "ISA" -#define BUSTYPE_INTERN "INTERN" /* Internal BUS */ -#define BUSTYPE_MCA "MCA" -#define BUSTYPE_VL "VL" /* Local bus */ -#define BUSTYPE_PCI "PCI" -#define BUSTYPE_PCMCIA "PCMCIA" -#define BUSTYPE_CBUS "CBUS" -#define BUSTYPE_CBUSII "CBUSII" -#define BUSTYPE_FUTURE "FUTURE" -#define BUSTYPE_MBI "MBI" -#define BUSTYPE_MBII "MBII" -#define BUSTYPE_MPI "MPI" -#define BUSTYPE_MPSA "MPSA" -#define BUSTYPE_NUBUS "NUBUS" -#define BUSTYPE_TC "TC" -#define BUSTYPE_VME "VME" -#define BUSTYPE_XPRESS "XPRESS" - -struct mpc_config_ioapic -{ - unsigned char mpc_type; - unsigned char mpc_apicid; - unsigned char mpc_apicver; - unsigned char mpc_flags; -#define MPC_APIC_USABLE 0x01 - unsigned int mpc_apicaddr; -}; - -struct mpc_config_intsrc -{ - unsigned char mpc_type; - unsigned char mpc_irqtype; - unsigned short mpc_irqflag; - unsigned char mpc_srcbus; - unsigned char mpc_srcbusirq; - unsigned char mpc_dstapic; - unsigned char mpc_dstirq; -}; - -enum mp_irq_source_types { - mp_INT = 0, - mp_NMI = 1, - mp_SMI = 2, - mp_ExtINT = 3 -}; - -#define MP_IRQDIR_DEFAULT 0 -#define MP_IRQDIR_HIGH 1 -#define MP_IRQDIR_LOW 3 - - -struct mpc_config_lintsrc -{ - unsigned char mpc_type; - unsigned char mpc_irqtype; - unsigned short mpc_irqflag; - unsigned char mpc_srcbusid; - unsigned char mpc_srcbusirq; - unsigned char mpc_destapic; -#define MP_APIC_ALL 0xFF - unsigned char mpc_destapiclint; -}; - -struct mp_config_oemtable -{ - char oem_signature[4]; -#define MPC_OEM_SIGNATURE "_OEM" - unsigned short oem_length; /* Size of table */ - char oem_rev; /* 0x01 */ - char oem_checksum; - char mpc_oem[8]; -}; - -struct mpc_config_translation -{ - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; -}; - -/* - * Default configurations - * - * 1 2 CPU ISA 82489DX - * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining - * 3 2 CPU EISA 82489DX - * 4 2 CPU MCA 82489DX - * 5 2 CPU ISA+PCI - * 6 2 CPU EISA+PCI - * 7 2 CPU MCA+PCI - */ - -#define MAX_MP_BUSSES 257 -#define MAX_IRQ_SOURCES (MAX_MP_BUSSES*4) -enum mp_bustype { - MP_BUS_ISA = 1, - MP_BUS_EISA, - MP_BUS_PCI, - MP_BUS_MCA -}; -extern int mp_bus_id_to_type [MAX_MP_BUSSES]; -extern int mp_bus_id_to_node [MAX_MP_BUSSES]; -extern int mp_bus_id_to_local [MAX_MP_BUSSES]; -extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; -extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; - -extern unsigned int boot_cpu_physical_apicid; -extern int smp_found_config; -extern void find_smp_config (void); -extern void get_smp_config (void); -extern int apic_version [MAX_APICS]; -extern int mp_current_pci_id; -extern unsigned long mp_lapic_addr; - -#endif - diff --git a/xen/include/asm-x86_64/msr.h b/xen/include/asm-x86_64/msr.h deleted file mode 100644 index f630034630..0000000000 --- a/xen/include/asm-x86_64/msr.h +++ /dev/null @@ -1,166 +0,0 @@ -#ifndef X86_64_MSR_H -#define X86_64_MSR_H 1 - -#ifndef __ASSEMBLY__ -/* - * Access to machine-specific registers (available on 586 and better only) - * Note: the rd* operations modify the parameters directly (without using - * pointer indirection), this allows gcc to optimize better - */ - -#define rdmsr(msr,val1,val2) \ - __asm__ __volatile__("rdmsr" \ - : "=a" (val1), "=d" (val2) \ - : "c" (msr)) - - -#define rdmsrl(msr,val) do { unsigned long a__,b__; \ - __asm__ __volatile__("rdmsr" \ - : "=a" (a__), "=d" (b__) \ - : "c" (msr)); \ - val = a__ | (b__<<32); \ -} while(0); - -#define wrmsr(msr,val1,val2) \ - __asm__ __volatile__("wrmsr" \ - : /* no outputs */ \ - : "c" (msr), "a" (val1), "d" (val2)) - -#define rdtsc(low,high) \ - __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) - -#define rdtscl(low) \ - __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") - -#define rdtscll(val) do { \ - unsigned int a,d; \ - asm volatile("rdtsc" : "=a" (a), "=d" (d)); \ - (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \ -} while(0) - -#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) - -#define rdpmc(counter,low,high) \ - __asm__ __volatile__("rdpmc" \ - : "=a" (low), "=d" (high) \ - : "c" (counter)) - -#endif - -/* AMD/K8 specific MSRs */ -#define MSR_EFER 0xc0000080 /* extended feature register */ -#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ -#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ -#define MSR_CSTAR 0xc0000083 /* compatibility mode SYSCALL target */ -#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ -#define MSR_FS_BASE 0xc0000100 /* 64bit GS base */ -#define MSR_GS_BASE 0xc0000101 /* 64bit FS base */ -#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */ -/* EFER bits: */ -#define _EFER_SCE 0 /* SYSCALL/SYSRET */ -#define _EFER_LME 8 /* Long mode enable */ -#define _EFER_LMA 10 /* Long mode active (read-only) */ -#define _EFER_NX 11 /* No execute enable */ - -#define EFER_SCE (1<<_EFER_SCE) -#define EFER_LME (1< -typedef struct { unsigned long l1_lo; } l1_pgentry_t; -typedef struct { unsigned long l2_lo; } l2_pgentry_t; -typedef struct { unsigned long l3_lo; } l3_pgentry_t; -typedef struct { unsigned long l4_lo; } l4_pgentry_t; -typedef l1_pgentry_t *l1_pagetable_t; -typedef l2_pgentry_t *l2_pagetable_t; -typedef l3_pgentry_t *l3_pagetable_t; -typedef l4_pgentry_t *l4_pagetable_t; -typedef struct { unsigned long pt_lo; } pagetable_t; -typedef struct { unsigned long pgprot; } pgprot_t; -#endif /* !__ASSEMBLY__ */ - -/* Strip type from a table entry. */ -#define l1_pgentry_val(_x) ((_x).l1_lo) -#define l2_pgentry_val(_x) ((_x).l2_lo) -#define l3_pgentry_val(_x) ((_x).l3_lo) -#define l4_pgentry_val(_x) ((_x).l4_lo) -#define pagetable_val(_x) ((_x).pt_lo) - -#define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL)) -#define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL)) -#define alloc_l3_pagetable() ((l3_pgentry_t *)get_free_page(GFP_KERNEL)) -#define alloc_l4_pagetable() ((l4_pgentry_t *)get_free_page(GFP_KERNEL)) - -/* Add type to a table entry. */ -#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } ) -#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } ) -#define mk_l3_pgentry(_x) ( (l3_pgentry_t) { (_x) } ) -#define mk_l4_pgentry(_x) ( (l4_pgentry_t) { (_x) } ) -#define mk_pagetable(_x) ( (pagetable_t) { (_x) } ) - -/* Turn a typed table entry into a page index. */ -#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT) -#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT) -#define l3_pgentry_to_pagenr(_x) (l3_pgentry_val(_x) >> PAGE_SHIFT) -#define l4_pgentry_to_pagenr(_x) (l4_pgentry_val(_x) >> PAGE_SHIFT) - -/* Turn a typed table entry into a physical address. */ -#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK) -#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK) -#define l3_pgentry_to_phys(_x) (l3_pgentry_val(_x) & PAGE_MASK) -#define l4_pgentry_to_phys(_x) (l4_pgentry_val(_x) & PAGE_MASK) - -/* Dereference a typed level-2 entry to yield a typed level-1 table. */ -#define l2_pgentry_to_l1(_x) \ - ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK)) - -/* Dereference a typed level-4 entry to yield a typed level-3 table. */ -#define l4_pgentry_to_l3(_x) \ - ((l3_pgentry_t *)__va(l4_pgentry_val(_x) & PAGE_MASK)) - -/* Dereference a typed level-3 entry to yield a typed level-2 table. */ -#define l3_pgentry_to_l2(_x) \ - ((l2_pgentry_t *)__va(l3_pgentry_val(_x) & PAGE_MASK)) - -/* Given a virtual address, get an entry offset into a page table. */ -#define l1_table_offset(_a) \ - (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1)) -#define l2_table_offset(_a) \ - (((_a) >> L2_PAGETABLE_SHIFT) & (ENTRIES_PER_L2_PAGETABLE - 1)) -#define l3_table_offset(_a) \ - (((_a) >> L3_PAGETABLE_SHIFT) & (ENTRIES_PER_L3_PAGETABLE - 1)) -#define l4_table_offset(_a) \ - ((_a) >> L4_PAGETABLE_SHIFT) - -/* Hypervisor table entries use zero to sugnify 'empty'. */ -#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x)) -#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x)) -#define l3_pgentry_empty(_x) (!l3_pgentry_val(_x)) -#define l4_pgentry_empty(_x) (!l4_pgentry_val(_x)) - - -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#define clear_user_page(page, vaddr) clear_page(page) -#define copy_user_page(to, from, vaddr) copy_page(to, from) - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) - -/* - * NB. We don't currently track I/O holes in the physical RAM space. - * For now we guess that I/O devices will be mapped in the first 1MB - * (e.g., VGA buffers) or beyond the end of physical RAM. - */ -#define pfn_is_ram(_pfn) (((_pfn) > 0x100) && ((_pfn) < max_page)) - -/* High table entries are reserved by the hypervisor. */ -#define DOMAIN_ENTRIES_PER_L4_PAGETABLE \ - (HYPERVISOR_VIRT_START >> L4_PAGETABLE_SHIFT) -#define HYPERVISOR_ENTRIES_PER_L4_PAGETABLE \ - (ENTRIES_PER_L4_PAGETABLE - DOMAIN_ENTRIES_PER_L4_PAGETABLE) - -#define __START_KERNEL 0xffffffff80100000 -#define __START_KERNEL_map 0xffffffff80000000 -#define __PAGE_OFFSET 0x0000010000000000 -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) - -#ifndef __ASSEMBLY__ -#include -#include -#include -#include - -extern unsigned long vm_stack_flags, vm_stack_flags32; -extern unsigned long vm_data_default_flags, vm_data_default_flags32; -extern unsigned long vm_force_exec32; - -#define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START) - -extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE]; -extern void paging_init(void); - -#define __flush_tlb() \ - do { \ - __asm__ __volatile__ ( \ - "movl %%cr3, %%eax; movl %%eax, %%cr3" \ - : : : "memory", "eax" ); \ - tlb_clocktick(); \ - } while ( 0 ) - -/* Flush global pages as well. */ - -#define __pge_off() \ - do { \ - __asm__ __volatile__( \ - "movl %0, %%cr4; # turn off PGE " \ - :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \ - } while (0) - -#define __pge_on() \ - do { \ - __asm__ __volatile__( \ - "movl %0, %%cr4; # turn off PGE " \ - :: "r" (mmu_cr4_features)); \ - } while (0) - - -#define __flush_tlb_pge() \ - do { \ - __pge_off(); \ - __flush_tlb(); \ - __pge_on(); \ - } while (0) - -#define __flush_tlb_one(__addr) \ -__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr))) - -#include - -/* - * Tell the user there is some problem. The exception handler decodes this frame. - */ -struct bug_frame { - unsigned char ud2[2]; - char *filename; /* should use 32bit offset instead, but the assembler doesn't like it */ - unsigned short line; -} __attribute__((packed)); -#define HEADER_BUG() asm volatile("ud2 ; .quad %P1 ; .short %P0" :: "i"(__LINE__), \ - "i" (__stringify(__FILE__))) -#define PAGE_BUG(page) BUG() - -#endif /* ASSEMBLY */ - -#define _PAGE_PRESENT 0x001 -#define _PAGE_RW 0x002 -#define _PAGE_USER 0x004 -#define _PAGE_PWT 0x008 -#define _PAGE_PCD 0x010 -#define _PAGE_ACCESSED 0x020 -#define _PAGE_DIRTY 0x040 -#define _PAGE_PAT 0x080 -#define _PAGE_PSE 0x080 -#define _PAGE_GLOBAL 0x100 - -#define __PAGE_HYPERVISOR \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -#define __PAGE_HYPERVISOR_NOCACHE \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) -#define __PAGE_HYPERVISOR_RO \ - (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) - -#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL) - -#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR) -#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO) -#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE) - -#define mk_l4_writeable(_p) \ - (*(_p) = mk_l4_pgentry(l4_pgentry_val(*(_p)) | _PAGE_RW)) -#define mk_l4_readonly(_p) \ - (*(_p) = mk_l4_pgentry(l4_pgentry_val(*(_p)) & ~_PAGE_RW)) -#define mk_l3_writeable(_p) \ - (*(_p) = mk_l3_pgentry(l3_pgentry_val(*(_p)) | _PAGE_RW)) -#define mk_l3_readonly(_p) \ - (*(_p) = mk_l3_pgentry(l3_pgentry_val(*(_p)) & ~_PAGE_RW)) -#define mk_l2_writeable(_p) \ - (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) | _PAGE_RW)) -#define mk_l2_readonly(_p) \ - (*(_p) = mk_l2_pgentry(l2_pgentry_val(*(_p)) & ~_PAGE_RW)) -#define mk_l1_writeable(_p) \ - (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) | _PAGE_RW)) -#define mk_l1_readonly(_p) \ - (*(_p) = mk_l1_pgentry(l1_pgentry_val(*(_p)) & ~_PAGE_RW)) - -/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. - Otherwise you risk miscompilation. */ -#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET) -/* __pa_symbol should use for C visible symbols, but only for them. - This seems to be the official gcc blessed way to do such arithmetic. */ -#define __pa_symbol(x) \ - ({unsigned long v; \ - asm("" : "=r" (v) : "0" (x)); \ - v - __START_KERNEL_map; }) -#define __pa_maybe_symbol(x) \ - ({unsigned long v; \ - asm("" : "=r" (v) : "0" (x)); \ - __pa(v); }) -#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#ifndef CONFIG_DISCONTIGMEM -#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT)) -#define pfn_to_page(pfn) (frame_table + (pfn)) -#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT)) -#define VALID_PAGE(page) (((page) - frame_table) < max_mapnr) -#endif - -#ifndef __ASSEMBLY__ -static __inline__ int get_order(unsigned long size) -{ - int order; - - size = (size-1) >> (PAGE_SHIFT-1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} -#endif - -#define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) - -#define __VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define __VM_STACK_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_DATA_DEFAULT_FLAGS \ - ((current->thread.flags & THREAD_IA32) ? vm_data_default_flags32 : \ - vm_data_default_flags) -#define VM_STACK_FLAGS vm_stack_flags - -#endif /* _X86_64_PAGE_H */ diff --git a/xen/include/asm-x86_64/param.h b/xen/include/asm-x86_64/param.h deleted file mode 100644 index 601733b463..0000000000 --- a/xen/include/asm-x86_64/param.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _ASMx86_64_PARAM_H -#define _ASMx86_64_PARAM_H - -#ifndef HZ -#define HZ 100 -#endif - -#define EXEC_PAGESIZE 4096 - -#ifndef NGROUPS -#define NGROUPS 32 -#endif - -#ifndef NOGROUP -#define NOGROUP (-1) -#endif - -#define MAXHOSTNAMELEN 64 /* max length of hostname */ - -#ifdef __KERNEL__ -# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */ -#endif - -#endif diff --git a/xen/include/asm-x86_64/pci.h b/xen/include/asm-x86_64/pci.h deleted file mode 100644 index 988670995c..0000000000 --- a/xen/include/asm-x86_64/pci.h +++ /dev/null @@ -1,336 +0,0 @@ -#ifndef __x8664_PCI_H -#define __x8664_PCI_H - -#include -#include - - -/* Can be used to override the logic in pci_scan_bus for skipping - already-configured bus numbers - to be used for buggy BIOSes - or architectures with incomplete PCI setup by the loader */ - -#ifdef CONFIG_PCI -extern unsigned int pcibios_assign_all_busses(void); -#else -#define pcibios_assign_all_busses() 0 -#endif - -extern unsigned long pci_mem_start; -#define PCIBIOS_MIN_IO 0x1000 -#define PCIBIOS_MIN_MEM (pci_mem_start) - -void pcibios_set_master(struct pci_dev *dev); -void pcibios_penalize_isa_irq(int irq); -struct irq_routing_table *pcibios_get_irq_routing_table(void); -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); - -#include -#include -#include -#include -#include - -struct pci_dev; -extern int force_mmu; - -/* Allocate and map kernel buffer using consistent mode DMA for a device. - * hwdev should be valid struct pci_dev pointer for PCI devices, - * NULL for PCI-like buses (ISA, EISA). - * Returns non-NULL cpu-view pointer to the buffer if successful and - * sets *dma_addrp to the pci side dma address as well, else *dma_addrp - * is undefined. - */ -extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle); - -/* Free and unmap a consistent DMA buffer. - * cpu_addr is what was returned from pci_alloc_consistent, - * size must be the same as what as passed into pci_alloc_consistent, - * and likewise dma_addr must be the same as what *dma_addrp was set to. - * - * References to the memory and mappings associated with cpu_addr/dma_addr - * past this call are illegal. - */ -extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle); - -#ifdef CONFIG_GART_IOMMU - -/* Map a single buffer of the indicated size for DMA in streaming mode. - * The 32-bit bus address to use is returned. - * - * Once the device is given the dma address, the device owns this memory - * until either pci_unmap_single or pci_dma_sync_single is performed. - */ -extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, - size_t size, int direction); - - -void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr, - size_t size, int direction); - -/* - * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical - * to pci_map_single, but takes a struct pfn_info instead of a virtual address - */ - -#define pci_map_page(dev,page,offset,size,dir) \ - pci_map_single((dev), page_address(page)+(offset), (size), (dir)) - -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ - dma_addr_t ADDR_NAME; -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ - __u32 LEN_NAME; -#define pci_unmap_addr(PTR, ADDR_NAME) \ - ((PTR)->ADDR_NAME) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ - (((PTR)->ADDR_NAME) = (VAL)) -#define pci_unmap_len(PTR, LEN_NAME) \ - ((PTR)->LEN_NAME) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ - (((PTR)->LEN_NAME) = (VAL)) - -static inline void pci_dma_sync_single(struct pci_dev *hwdev, - dma_addr_t dma_handle, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); -} - -static inline void pci_dma_sync_sg(struct pci_dev *hwdev, - struct scatterlist *sg, - int nelems, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); -} - -/* The PCI address space does equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (0) - - -#else -static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, - size_t size, int direction) -{ - dma_addr_t addr; - - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - addr = virt_to_bus(ptr); - - /* - * This is gross, but what should I do. - * Unfortunately drivers do not test the return value of this. - */ - if ((addr+size) & ~hwdev->dma_mask) - out_of_line_bug(); - return addr; -} - -static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ -} - -static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct pfn_info *page, - unsigned long offset, size_t size, int direction) -{ - dma_addr_t addr; - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - addr = (page - frame_table) * PAGE_SIZE + offset; - if ((addr+size) & ~hwdev->dma_mask) - out_of_line_bug(); - return addr; -} - -/* pci_unmap_{page,single} is a nop so... */ -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) (0) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define pci_unmap_len(PTR, LEN_NAME) (0) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) - -#define BAD_DMA_ADDRESS (-1UL) - - -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -static inline void pci_unmap_sg(struct pci_dev *dev, struct scatterlist *sg, - int nents, int dir) -{ - if (dir == PCI_DMA_NONE) - out_of_line_bug(); -} - - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scather-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - int i; - - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - - /* - * temporary 2.4 hack - */ - for (i = 0; i < nents; i++ ) { - struct scatterlist *s = &sg[i]; - void *addr = s->address; - if (addr) { - if (s->page || s->offset) - out_of_line_bug(); - } else if (s->page) - addr = page_address(s->page) + s->offset; -#if 0 - /* Invalid check, since address==0 is valid. */ - else - BUG(); -#endif - s->dma_address = pci_map_single(hwdev, addr, s->length, direction); - if (unlikely(s->dma_address == BAD_DMA_ADDRESS)) - goto error; - } - return nents; - - error: - pci_unmap_sg(hwdev, sg, i, direction); - return 0; -} - - -/* Make physical memory consistent for a single - * streaming mode DMA translation after a transfer. - * - * If you perform a pci_map_single() but wish to interrogate the - * buffer using the cpu, yet do not wish to teardown the PCI dma - * mapping, you must call this function before doing so. At the - * next point you give the PCI dma address back to the card, the - * device again owns the buffer. - */ -static inline void pci_dma_sync_single(struct pci_dev *hwdev, - dma_addr_t dma_handle, - size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); -} - -/* Make physical memory consistent for a set of streaming - * mode DMA translations after a transfer. - * - * The same as pci_dma_sync_single but for a scatter-gather list, - * same rules and usage. - */ -static inline void pci_dma_sync_sg(struct pci_dev *hwdev, - struct scatterlist *sg, - int nelems, int direction) -{ - if (direction == PCI_DMA_NONE) - out_of_line_bug(); - flush_write_buffers(); -} - -#define PCI_DMA_BUS_IS_PHYS 1 - -#endif - -#define pci_unmap_page pci_unmap_single - -/* Return whether the given PCI device DMA address mask can - * be supported properly. For example, if your device can - * only drive the low 24-bits during PCI bus mastering, then - * you would pass 0x00ffffff as the mask to this function. - */ -static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if(mask < 0x00ffffff) - return 0; - - return 1; -} - -/* This is always fine. */ -#define pci_dac_dma_supported(pci_dev, mask) (1) - -static __inline__ dma64_addr_t -pci_dac_page_to_dma(struct pci_dev *pdev, struct pfn_info *page, unsigned long offset, int direction) -{ - return ((dma64_addr_t) page_to_bus(page) + - (dma64_addr_t) offset); -} - -static __inline__ struct pfn_info * -pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - unsigned long poff = (dma_addr >> PAGE_SHIFT); - return frame_table + poff; -} - -static __inline__ unsigned long -pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - return (dma_addr & ~PAGE_MASK); -} - -static __inline__ void -pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) -{ - flush_write_buffers(); -} - -/* These macros should be used after a pci_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns. - */ -#define sg_dma_address(sg) ((sg)->dma_address) -#define sg_dma_len(sg) ((sg)->length) - -/* Return the index of the PCI controller for device. */ -static inline int pci_controller_num(struct pci_dev *dev) -{ - return 0; -} - -#if 0 /* XXX Not in land of Xen XXX */ -#define HAVE_PCI_MMAP -extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine); -#endif - - -#endif /* __x8664_PCI_H */ diff --git a/xen/include/asm-x86_64/pda.h b/xen/include/asm-x86_64/pda.h deleted file mode 100644 index b9ca345ee4..0000000000 --- a/xen/include/asm-x86_64/pda.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef X86_64_PDA_H -#define X86_64_PDA_H - -#include - -/* Per processor datastructure. %gs points to it while the kernel runs */ -/* To use a new field with the *_pda macros it needs to be added to tools/offset.c */ -struct x8664_pda { - unsigned long kernelstack; /* TOS for current process */ - unsigned long oldrsp; /* user rsp for system call */ - unsigned long irqrsp; /* Old rsp for interrupts. */ - struct task_struct *pcurrent; /* Current process */ - int irqcount; /* Irq nesting counter. Starts with -1 */ - int cpunumber; /* Logical CPU number */ - /* XXX: could be a single list */ - unsigned long *pgd_quick; - unsigned long *pmd_quick; - unsigned long *pte_quick; - unsigned long pgtable_cache_sz; - char *irqstackptr; /* top of irqstack */ - unsigned long volatile *level4_pgt; -} ____cacheline_aligned; - -#define PDA_STACKOFFSET (5*8) - -#define IRQSTACK_ORDER 2 -#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) - -extern struct x8664_pda cpu_pda[]; - -/* - * There is no fast way to get the base address of the PDA, all the accesses - * have to mention %fs/%gs. So it needs to be done this Torvaldian way. - */ -#define sizeof_field(type,field) (sizeof(((type *)0)->field)) -#define typeof_field(type,field) typeof(((type *)0)->field) - -extern void __bad_pda_field(void); -/* Don't use offsetof because it requires too much infrastructure */ -#define pda_offset(field) ((unsigned long)&((struct x8664_pda *)0)->field) - -#define pda_to_op(op,field,val) do { \ - switch (sizeof_field(struct x8664_pda, field)) { \ - case 2: asm volatile(op "w %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \ - case 4: asm volatile(op "l %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \ - case 8: asm volatile(op "q %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \ - default: __bad_pda_field(); \ - } \ - } while (0) - - -#define pda_from_op(op,field) ({ \ - typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \ - switch (sizeof_field(struct x8664_pda, field)) { \ - case 2: asm volatile(op "w %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \ - case 4: asm volatile(op "l %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \ - case 8: asm volatile(op "q %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \ - default: __bad_pda_field(); \ - } \ - ret__; }) - - -#define read_pda(field) pda_from_op("mov",field) -#define write_pda(field,val) pda_to_op("mov",field,val) -#define add_pda(field,val) pda_to_op("add",field,val) -#define sub_pda(field,val) pda_to_op("sub",field,val) - -#endif diff --git a/xen/include/asm-x86_64/pdb.h b/xen/include/asm-x86_64/pdb.h deleted file mode 100644 index 35b926eb17..0000000000 --- a/xen/include/asm-x86_64/pdb.h +++ /dev/null @@ -1,51 +0,0 @@ - -/* - * pervasive debugger - * - * alex ho - * 2004 - * university of cambridge computer laboratory - */ - - -#ifndef __PDB_H__ -#define __PDB_H__ - -#include -#include - -extern int pdb_initialized; -extern int pdb_com_port; -extern int pdb_high_bit; - -extern void initialize_pdb(void); - -/* Get/set values from generic debug interface. */ -extern int pdb_set_values(domid_t domain, u_char *buffer, - unsigned long addr, int length); -extern int pdb_get_values(domid_t domain, u_char *buffer, - unsigned long addr, int length); - -/* External entry points. */ -extern int pdb_handle_exception(int exceptionVector, - struct pt_regs *xen_regs); -extern int pdb_serial_input(u_char c, struct pt_regs *regs); -extern void pdb_do_debug(dom0_op_t *op); - -/* Breakpoints. */ -struct pdb_breakpoint -{ - struct list_head list; - unsigned long address; -}; -extern void pdb_bkpt_add (unsigned long address); -extern struct pdb_breakpoint* pdb_bkpt_search (unsigned long address); -extern int pdb_bkpt_remove (unsigned long address); - -/* Conversions. */ -extern int hex (char); -extern char *mem2hex (char *, char *, int); -extern char *hex2mem (char *, char *, int); -extern int hexToInt (char **ptr, int *intValue); - -#endif /* __PDB_H__ */ diff --git a/xen/include/asm-x86_64/pgalloc.h b/xen/include/asm-x86_64/pgalloc.h deleted file mode 100644 index 559e33194f..0000000000 --- a/xen/include/asm-x86_64/pgalloc.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef _X86_64_PGALLOC_H -#define _X86_64_PGALLOC_H - -#include -#include -#include -#include - -/* XXX probably should be moved to flushtlb.h */ - -/* - * TLB flushing: - * - * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables - */ - -#ifndef CONFIG_SMP - -#define flush_tlb() __flush_tlb() -#define flush_tlb_all() __flush_tlb() -#define flush_tlb_all_pge() __flush_tlb_pge() -#define local_flush_tlb() __flush_tlb() -#define flush_tlb_cpu(_cpu) __flush_tlb() -#define flush_tlb_mask(_mask) __flush_tlb() -#define try_flush_tlb_mask(_mask) __flush_tlb() - -#else -#include - -extern int try_flush_tlb_mask(unsigned long mask); -extern void flush_tlb_mask(unsigned long mask); -extern void flush_tlb_all_pge(void); - -#define flush_tlb() __flush_tlb() -#define flush_tlb_all() flush_tlb_mask((1 << smp_num_cpus) - 1) -#define local_flush_tlb() __flush_tlb() -#define flush_tlb_cpu(_cpu) flush_tlb_mask(1 << (_cpu)) - -#endif - -#endif /* _X86_64_PGALLOC_H */ diff --git a/xen/include/asm-x86_64/processor.h b/xen/include/asm-x86_64/processor.h deleted file mode 100644 index ad3344cf3c..0000000000 --- a/xen/include/asm-x86_64/processor.h +++ /dev/null @@ -1,463 +0,0 @@ -/* - * include/asm-x86_64/processor.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -#ifndef __ASM_X86_64_PROCESSOR_H -#define __ASM_X86_64_PROCESSOR_H - -#include -#include -#include -#include -#include -#include - -struct task_struct; - -#define TF_MASK 0x00000100 -#define IF_MASK 0x00000200 -#define IOPL_MASK 0x00003000 -#define NT_MASK 0x00004000 -#define VM_MASK 0x00020000 -#define AC_MASK 0x00040000 -#define VIF_MASK 0x00080000 /* virtual interrupt flag */ -#define VIP_MASK 0x00100000 /* virtual interrupt pending */ -#define ID_MASK 0x00200000 - -/* - * Default implementation of macro that returns current - * instruction pointer ("program counter"). - */ -#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; }) - -/* - * CPU type and hardware bug flags. Kept separately for each CPU. - * Members of this structure are referenced in head.S, so think twice - * before touching them. [mj] - */ - -struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; - __u8 x86_mask; - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ - __u32 x86_capability[NCAPINTS]; - char x86_vendor_id[16]; - char x86_model_id[64]; - int x86_cache_size; /* in KB - valid for CPUS which support this - call */ - int x86_clflush_size; - int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ - __u8 x86_virt_bits, x86_phys_bits; - __u32 x86_power; - unsigned long loops_per_jiffy; -} ____cacheline_aligned; - -#define X86_VENDOR_INTEL 0 -#define X86_VENDOR_CYRIX 1 -#define X86_VENDOR_AMD 2 -#define X86_VENDOR_UMC 3 -#define X86_VENDOR_NEXGEN 4 -#define X86_VENDOR_CENTAUR 5 -#define X86_VENDOR_RISE 6 -#define X86_VENDOR_TRANSMETA 7 -#define X86_VENDOR_UNKNOWN 0xff - -/* - * capabilities of CPUs - */ - -extern struct cpuinfo_x86 boot_cpu_data; -extern struct tss_struct init_tss[NR_CPUS]; - -#ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] -#else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data -#endif - -#define cpu_has_pge 1 -#define cpu_has_pse 1 -#define cpu_has_pae 1 -#define cpu_has_tsc 1 -#define cpu_has_de 1 -#define cpu_has_vme 1 -#define cpu_has_fxsr 1 -#define cpu_has_xmm 1 -#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) - -extern char ignore_irq13; - -extern void identify_cpu(struct cpuinfo_x86 *); -extern void print_cpu_info(struct cpuinfo_x86 *); -extern void dodgy_tsc(void); - -/* - * EFLAGS bits - */ -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ -#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ - -/* - * Generic CPUID function - * FIXME: This really belongs to msr.h - */ -extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) -{ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op)); -} - -/* - * CPUID functions returning a single datum - */ -extern inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax; - - __asm__("cpuid" - : "=a" (eax) - : "0" (op) - : "bx", "cx", "dx"); - return eax; -} -extern inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx; - - __asm__("cpuid" - : "=a" (eax), "=b" (ebx) - : "0" (op) - : "cx", "dx" ); - return ebx; -} -extern inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ecx; - - __asm__("cpuid" - : "=a" (eax), "=c" (ecx) - : "0" (op) - : "bx", "dx" ); - return ecx; -} -extern inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, edx; - - __asm__("cpuid" - : "=a" (eax), "=d" (edx) - : "0" (op) - : "bx", "cx"); - return edx; -} - - -/* - * Intel CPU flags in CR0 - */ -#define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */ -#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor (RW) */ -#define X86_CR0_EM 0x00000004 /* Require FPU Emulation (RO) */ -#define X86_CR0_TS 0x00000008 /* Task Switched (RW) */ -#define X86_CR0_NE 0x00000020 /* Numeric Error Reporting (RW) */ -#define X86_CR0_WP 0x00010000 /* Supervisor Write Protect (RW) */ -#define X86_CR0_AM 0x00040000 /* Alignment Checking (RW) */ -#define X86_CR0_NW 0x20000000 /* Not Write-Through (RW) */ -#define X86_CR0_CD 0x40000000 /* Cache Disable (RW) */ -#define X86_CR0_PG 0x80000000 /* Paging (RW) */ - -#define read_cr0() ({ \ - unsigned long __dummy; \ - __asm__( \ - "movq %%cr0,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) - -#define write_cr0(x) \ - __asm__("movq %0,%%cr0": :"r" (x)); - - - -/* - * Intel CPU features in CR4 - */ -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ -#define X86_CR4_MCE 0x0040 /* Machine check enable */ -#define X86_CR4_PGE 0x0080 /* enable global pages */ -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ -#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ -#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ - -/* - * Save the cr4 feature set we're using (ie - * Pentium 4MB enable and PPro Global page - * enable), so that any CPU's that boot up - * after us can get the correct flags. - */ -extern unsigned long mmu_cr4_features; - -static inline void set_in_cr4 (unsigned long mask) -{ - mmu_cr4_features |= mask; - __asm__("movq %%cr4,%%rax\n\t" - "orq %0,%%rax\n\t" - "movq %%rax,%%cr4\n" - : : "irg" (mask) - :"ax"); -} - -static inline void clear_in_cr4 (unsigned long mask) -{ - mmu_cr4_features &= ~mask; - __asm__("movq %%cr4,%%rax\n\t" - "andq %0,%%rax\n\t" - "movq %%rax,%%cr4\n" - : : "irg" (~mask) - :"ax"); -} - -/* - * Cyrix CPU configuration register indexes - */ -#define CX86_CCR0 0xc0 -#define CX86_CCR1 0xc1 -#define CX86_CCR2 0xc2 -#define CX86_CCR3 0xc3 -#define CX86_CCR4 0xe8 -#define CX86_CCR5 0xe9 -#define CX86_CCR6 0xea -#define CX86_CCR7 0xeb -#define CX86_DIR0 0xfe -#define CX86_DIR1 0xff -#define CX86_ARR_BASE 0xc4 -#define CX86_RCR_BASE 0xdc - -/* - * Cyrix CPU indexed register access macros - */ - -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - -/* - * Bus types - */ -#define EISA_bus 0 -#define MCA_bus 0 -#define MCA_bus__is_a_macro - - -/* - * User space process size: 512GB - 1GB (default). - */ -#define TASK_SIZE (0x0000007fc0000000) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_32 0xa0000000 -#define TASK_UNMAPPED_64 (TASK_SIZE/3) -#define TASK_UNMAPPED_BASE \ - ((current->thread.flags & THREAD_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64) - -/* - * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. - */ -#define IO_BITMAP_SIZE 32 -#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) -#define INVALID_IO_BITMAP_OFFSET 0x8000 - -struct i387_fxsave_struct { - u16 cwd; - u16 swd; - u16 twd; - u16 fop; - u64 rip; - u64 rdp; - u32 mxcsr; - u32 mxcsr_mask; - u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */ - u32 padding[24]; -} __attribute__ ((aligned (16))); - -union i387_union { - struct i387_fxsave_struct fxsave; -}; - -typedef struct { - unsigned long seg; -} mm_segment_t; - -struct tss_struct { - unsigned short back_link,__blh; -/* u32 reserved1; */ - u64 rsp0; - u64 rsp1; - u64 rsp2; - u64 reserved2; - u64 ist[7]; - u32 reserved3; - u32 reserved4; - u16 reserved5; - u16 io_map_base; - u32 io_bitmap[IO_BITMAP_SIZE]; -} __attribute__((packed)) ____cacheline_aligned; - -struct thread_struct { - unsigned long guestos_sp; - unsigned long guestos_ss; - unsigned long rip; - unsigned long rsp; - unsigned long userrsp; /* Copy from PDA */ - unsigned long fs; - unsigned long gs; - unsigned short es, ds, fsindex, gsindex; - enum { - THREAD_IA32 = 0x0001, - } flags; -/* Hardware debugging registers */ - unsigned long debugreg[8]; /* %%db0-7 debug registers */ -/* floating point info */ - union i387_union i387; -/* Trap info. */ - trap_info_t traps[256]; -}; - -#define IDT_ENTRIES 256 -extern struct gate_struct idt_table[]; -extern struct gate_struct *idt_tables[]; - -#define INIT_THREAD { \ - 0, 0, \ - 0, 0, 0, 0, \ - 0, 0, 0, 0, \ - 0, /* flags */ \ - { [0 ... 7] = 0 }, /* debugging registers */ \ - { { 0, }, }, /* 387 state */ \ - { {0} } /* io permissions */ \ -} - -#define INIT_TSS { \ - 0,0, /* back_link, __blh */ \ - 0, /* rsp0 */ \ - 0, 0, /* rsp1, rsp2 */ \ - 0, /* reserved */ \ - { [0 ... 6] = 0 }, /* ist[] */ \ - 0,0, /* reserved */ \ - 0, INVALID_IO_BITMAP_OFFSET, /* trace, bitmap */ \ - {~0, } /* ioperm */ \ -} - -struct mm_struct { - /* - * Every domain has a L1 pagetable of its own. Per-domain mappings - * are put in this table (eg. the current GDT is mapped here). - */ - l1_pgentry_t *perdomain_pt; - pagetable_t pagetable; - /* Current LDT details. */ - unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt; - /* Next entry is passed to LGDT on domain switch. */ - char gdt[10]; -}; - -#define IDLE0_MM \ -{ \ - perdomain_pt: 0, \ - pagetable: mk_pagetable(__pa(idle_pg_table)) \ -} - -/* Convenient accessor for mm.gdt. */ -#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (_e)) -#define SET_GDT_ADDRESS(_p, _a) ((*(u64 *)((_p)->mm.gdt + 2)) = (_a)) -#define GET_GDT_ENTRIES(_p) ((*(u16 *)((_p)->mm.gdt + 0))) -#define GET_GDT_ADDRESS(_p) ((*(u64 *)((_p)->mm.gdt + 2))) - -long set_gdt(struct task_struct *p, - unsigned long *frames, - unsigned int entries); - -long set_debugreg(struct task_struct *p, int reg, unsigned long value); - -struct microcode { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int reserved[5]; - unsigned int bits[500]; -}; - -/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ -#define MICROCODE_IOCFREE _IO('6',0) - -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) -{ - __asm__ __volatile__("rep;nop"); -} - -#define cpu_relax() rep_nop() - -#define init_task (init_task_union.task) -#define init_stack (init_task_union.stack) - -/* Avoid speculative execution by the CPU */ -extern inline void sync_core(void) -{ - int tmp; - asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); -} - -#define cpu_has_fpu 1 - -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -#define prefetch(x) __builtin_prefetch((x),0) -#define prefetchw(x) __builtin_prefetch((x),1) -#define spin_lock_prefetch(x) prefetchw(x) -#define cpu_relax() rep_nop() - - -#endif /* __ASM_X86_64_PROCESSOR_H */ diff --git a/xen/include/asm-x86_64/ptrace.h b/xen/include/asm-x86_64/ptrace.h deleted file mode 100644 index da0419f429..0000000000 --- a/xen/include/asm-x86_64/ptrace.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef _X86_64_PTRACE_H -#define _X86_64_PTRACE_H - -#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) -#define R15 0 -#define R14 8 -#define R13 16 -#define R12 24 -#define RBP 36 -#define RBX 40 -/* arguments: interrupts/non tracing syscalls only save upto here*/ -#define R11 48 -#define R10 56 -#define R9 64 -#define R8 72 -#define RAX 80 -#define RCX 88 -#define RDX 96 -#define RSI 104 -#define RDI 112 -#define ORIG_RAX 120 /* = ERROR */ -/* end of arguments */ -/* cpu exception frame or undefined in case of fast syscall. */ -#define RIP 128 -#define CS 136 -#define EFLAGS 144 -#define RSP 152 -#define SS 160 -#define ARGOFFSET R11 -#endif /* __ASSEMBLY__ */ - -/* top of stack page */ -#define FRAME_SIZE 168 - -#define PTRACE_SETOPTIONS 21 - -/* options set using PTRACE_SETOPTIONS */ -#define PTRACE_O_TRACESYSGOOD 0x00000001 - -/* Dummy values for ptrace */ -#define FS 1000 -#define GS 1008 - -#ifndef __ASSEMBLY__ - -struct pt_regs { - unsigned long r15; - unsigned long r14; - unsigned long r13; - unsigned long r12; - unsigned long rbp; - unsigned long rbx; -/* arguments: non interrupts/non tracing syscalls only save upto here*/ - unsigned long r11; - unsigned long r10; - unsigned long r9; - unsigned long r8; - unsigned long rax; - unsigned long rcx; - unsigned long rdx; - unsigned long rsi; - unsigned long rdi; - unsigned long orig_rax; -/* end of arguments */ -/* cpu exception frame or undefined */ - unsigned long rip; - unsigned long cs; - unsigned long eflags; - unsigned long rsp; - unsigned long ss; -/* top of stack page */ -}; - -#endif - -/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 -#define PTRACE_GETFPREGS 14 -#define PTRACE_SETFPREGS 15 -#define PTRACE_GETFPXREGS 18 -#define PTRACE_SETFPXREGS 19 - -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) -#define user_mode(regs) (!!((regs)->cs & 3)) -#define instruction_pointer(regs) ((regs)->rip) -extern void show_regs(struct pt_regs *); - -enum { - EF_CF = 0x00000001, - EF_PF = 0x00000004, - EF_AF = 0x00000010, - EF_ZF = 0x00000040, - EF_SF = 0x00000080, - EF_TF = 0x00000100, - EF_IE = 0x00000200, - EF_DF = 0x00000400, - EF_OF = 0x00000800, - EF_IOPL = 0x00003000, - EF_IOPL_RING0 = 0x00000000, - EF_IOPL_RING1 = 0x00001000, - EF_IOPL_RING2 = 0x00002000, - EF_NT = 0x00004000, /* nested task */ - EF_RF = 0x00010000, /* resume */ - EF_VM = 0x00020000, /* virtual mode */ - EF_AC = 0x00040000, /* alignment */ - EF_VIF = 0x00080000, /* virtual interrupt */ - EF_VIP = 0x00100000, /* virtual interrupt pending */ - EF_ID = 0x00200000, /* id */ -}; - -#endif - -#endif diff --git a/xen/include/asm-x86_64/rwlock.h b/xen/include/asm-x86_64/rwlock.h deleted file mode 100644 index 8920e5829f..0000000000 --- a/xen/include/asm-x86_64/rwlock.h +++ /dev/null @@ -1,84 +0,0 @@ -/* include/asm-x86_64/rwlock.h - * - * Helpers used by both rw spinlocks and rw semaphores. - * - * Based in part on code from semaphore.h and - * spinlock.h Copyright 1996 Linus Torvalds. - * - * Copyright 1999 Red Hat, Inc. - * Copyright 2001,2002 SuSE labs - * - * Written by Benjamin LaHaise. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _ASM_X86_64_RWLOCK_H -#define _ASM_X86_64_RWLOCK_H - -#define RW_LOCK_BIAS 0x01000000 -#define RW_LOCK_BIAS_STR "0x01000000" - -#define __build_read_lock_ptr(rw, helper) \ - asm volatile(LOCK "subl $1,(%0)\n\t" \ - "js 2f\n" \ - "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tcall " helper "\n\t" \ - "jmp 1b\n" \ - ".previous" \ - ::"a" (rw) : "memory") - -#define __build_read_lock_const(rw, helper) \ - asm volatile(LOCK "subl $1,%0\n\t" \ - "js 2f\n" \ - "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tpushq %%rax\n\t" \ - "leaq %0,%%rax\n\t" \ - "call " helper "\n\t" \ - "popq %%rax\n\t" \ - "jmp 1b\n" \ - ".previous" \ - :"=m" (*(volatile int *)rw) : : "memory") - -#define __build_read_lock(rw, helper) do { \ - if (__builtin_constant_p(rw)) \ - __build_read_lock_const(rw, helper); \ - else \ - __build_read_lock_ptr(rw, helper); \ - } while (0) - -#define __build_write_lock_ptr(rw, helper) \ - asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ - "jnz 2f\n" \ - "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tcall " helper "\n\t" \ - "jmp 1b\n" \ - ".previous" \ - ::"a" (rw) : "memory") - -#define __build_write_lock_const(rw, helper) \ - asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ - "jnz 2f\n" \ - "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tpushq %%rax\n\t" \ - "leaq %0,%%rax\n\t" \ - "call " helper "\n\t" \ - "popq %%rax\n\t" \ - "jmp 1b\n" \ - ".previous" \ - :"=m" (*(volatile int *)rw) : : "memory") - -#define __build_write_lock(rw, helper) do { \ - if (__builtin_constant_p(rw)) \ - __build_write_lock_const(rw, helper); \ - else \ - __build_write_lock_ptr(rw, helper); \ - } while (0) - -#endif diff --git a/xen/include/asm-x86_64/scatterlist.h b/xen/include/asm-x86_64/scatterlist.h deleted file mode 100644 index 1597d48eb0..0000000000 --- a/xen/include/asm-x86_64/scatterlist.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _X8664_SCATTERLIST_H -#define _X8664_SCATTERLIST_H - -struct scatterlist { - char * address; /* Location data is to be transferred to, NULL for - * highmem page */ - struct pfn_info * page; /* Location for highmem page, if any */ - unsigned int offset;/* for highmem, page offset */ - - dma_addr_t dma_address; - unsigned int length; -}; - -#define ISA_DMA_THRESHOLD (0x00ffffff) - -#endif /* !(_I386_SCATTERLIST_H) */ diff --git a/xen/include/asm-x86_64/smp.h b/xen/include/asm-x86_64/smp.h deleted file mode 100644 index bdc1b40e25..0000000000 --- a/xen/include/asm-x86_64/smp.h +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef __ASM_SMP_H -#define __ASM_SMP_H - -#include -#include - -#ifdef CONFIG_SMP -#ifndef ASSEMBLY -#include - -/* - * Private routines/data - */ - -extern void smp_alloc_memory(void); -extern unsigned long phys_cpu_present_map; -extern unsigned long cpu_online_map; -extern volatile unsigned long smp_invalidate_needed; -extern int pic_mode; -extern void smp_flush_tlb(void); -extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); -extern void smp_invalidate_rcv(void); /* Process an NMI */ -extern void (*mtrr_hook) (void); -extern void zap_low_mappings (void); - -/* - * On x86 all CPUs are mapped 1:1 to the APIC space. - * This simplifies scheduling and IPI sending and - * compresses data structures. - */ -static inline int cpu_logical_map(int cpu) -{ - return cpu; -} -static inline int cpu_number_map(int cpu) -{ - return cpu; -} - -/* - * Some lowlevel functions might want to know about - * the real APIC ID <-> CPU # mapping. - */ -#define MAX_APICID 256 -extern volatile int cpu_to_physical_apicid[NR_CPUS]; -extern volatile int physical_apicid_to_cpu[MAX_APICID]; -extern volatile int cpu_to_logical_apicid[NR_CPUS]; -extern volatile int logical_apicid_to_cpu[MAX_APICID]; - -/* - * General functions that each host system must provide. - */ - -extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ - -/* - * This function is needed by all SMP systems. It must _always_ be valid - * from the initial startup. We map APIC_BASE very early in page_setup(), - * so this is correct in the x86 case. - */ - -#define smp_processor_id() read_pda(cpunumber) - -#include -#include - -static __inline int hard_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_ID(*(unsigned *)(APIC_BASE+APIC_ID)); -} - -extern int apic_disabled; -extern int slow_smp_processor_id(void); -#define safe_smp_processor_id() \ - (!apic_disabled ? hard_smp_processor_id() : slow_smp_processor_id()) - -#endif /* !ASSEMBLY */ - -#define NO_PROC_ID 0xFF /* No processor magic marker */ - -/* - * This magic constant controls our willingness to transfer - * a process across CPUs. Such a transfer incurs misses on the L1 - * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My - * gut feeling is this will vary by board in value. For a board - * with separate L2 cache it probably depends also on the RSS, and - * for a board with shared L2 cache it ought to decay fast as other - * processes are run. - */ - -#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ - - - -#endif -#define INT_DELIVERY_MODE 1 /* logical delivery */ -#define TARGET_CPUS 1 - -#ifndef CONFIG_SMP -#define safe_smp_processor_id() 0 -#endif -#endif diff --git a/xen/include/asm-x86_64/smpboot.h b/xen/include/asm-x86_64/smpboot.h deleted file mode 100644 index 4017902c69..0000000000 --- a/xen/include/asm-x86_64/smpboot.h +++ /dev/null @@ -1,130 +0,0 @@ -#ifndef __ASM_SMPBOOT_H -#define __ASM_SMPBOOT_H - -/*emum for clustered_apic_mode values*/ -enum{ - CLUSTERED_APIC_NONE = 0, - CLUSTERED_APIC_XAPIC, - CLUSTERED_APIC_NUMAQ -}; - -#ifdef CONFIG_X86_CLUSTERED_APIC -extern unsigned int apic_broadcast_id; -extern unsigned char clustered_apic_mode; -extern unsigned char esr_disable; -extern unsigned char int_delivery_mode; -extern unsigned int int_dest_addr_mode; -extern int cyclone_setup(char*); - -static inline void detect_clustered_apic(char* oem, char* prod) -{ - /* - * Can't recognize Summit xAPICs at present, so use the OEM ID. - */ - if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){ - clustered_apic_mode = CLUSTERED_APIC_XAPIC; - apic_broadcast_id = APIC_BROADCAST_ID_XAPIC; - int_dest_addr_mode = APIC_DEST_PHYSICAL; - int_delivery_mode = dest_Fixed; - esr_disable = 1; - /*Start cyclone clock*/ - cyclone_setup(0); - } - else if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "RUTHLESS SMP", 9)){ - clustered_apic_mode = CLUSTERED_APIC_XAPIC; - apic_broadcast_id = APIC_BROADCAST_ID_XAPIC; - int_dest_addr_mode = APIC_DEST_PHYSICAL; - int_delivery_mode = dest_Fixed; - esr_disable = 1; - /*Start cyclone clock*/ - cyclone_setup(0); - } - else if (!strncmp(oem, "IBM NUMA", 8)){ - clustered_apic_mode = CLUSTERED_APIC_NUMAQ; - apic_broadcast_id = APIC_BROADCAST_ID_APIC; - int_dest_addr_mode = APIC_DEST_LOGICAL; - int_delivery_mode = dest_LowestPrio; - esr_disable = 1; - } -} -#define INT_DEST_ADDR_MODE (int_dest_addr_mode) -#define INT_DELIVERY_MODE (int_delivery_mode) -#else /* CONFIG_X86_CLUSTERED_APIC */ -#define apic_broadcast_id (APIC_BROADCAST_ID_APIC) -#define clustered_apic_mode (CLUSTERED_APIC_NONE) -#define esr_disable (0) -#define detect_clustered_apic(x,y) -#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL) /* logical delivery */ -#define INT_DELIVERY_MODE (dest_LowestPrio) -#endif /* CONFIG_X86_CLUSTERED_APIC */ -#define BAD_APICID 0xFFu - -#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467) -#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469) - -#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid) - -extern unsigned char raw_phys_apicid[NR_CPUS]; - -/* - * How to map from the cpu_present_map - */ -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (clustered_apic_mode == CLUSTERED_APIC_XAPIC) - return raw_phys_apicid[mps_cpu]; - if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) - return (mps_cpu/4)*16 + (1<<(mps_cpu%4)); - return mps_cpu; -} - -static inline unsigned long apicid_to_phys_cpu_present(int apicid) -{ - if(clustered_apic_mode) - return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3)); - return 1UL << apicid; -} - -#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) ) - -/* - * Mappings between logical cpu number and logical / physical apicid - * The first four macros are trivial, but it keeps the abstraction consistent - */ -extern volatile int logical_apicid_2_cpu[]; -extern volatile int cpu_2_logical_apicid[]; -extern volatile int physical_apicid_2_cpu[]; -extern volatile int cpu_2_physical_apicid[]; - -#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid] -#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu] -#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid] -#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu] -#ifdef CONFIG_MULTIQUAD /* use logical IDs to bootstrap */ -#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid] -#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu] -#else /* !CONFIG_MULTIQUAD */ /* use physical IDs to bootstrap */ -#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid] -#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu] -#endif /* CONFIG_MULTIQUAD */ - -#ifdef CONFIG_X86_CLUSTERED_APIC -static inline int target_cpus(void) -{ - static int cpu; - switch(clustered_apic_mode){ - case CLUSTERED_APIC_NUMAQ: - /* Broadcast intrs to local quad only. */ - return APIC_BROADCAST_ID_APIC; - case CLUSTERED_APIC_XAPIC: - /*round robin the interrupts*/ - cpu = (cpu+1)%smp_num_cpus; - return cpu_to_physical_apicid(cpu); - default: - } - return cpu_online_map; -} -#else -#define target_cpus() (0xFF) -#endif -#endif diff --git a/xen/include/asm-x86_64/softirq.h b/xen/include/asm-x86_64/softirq.h deleted file mode 100644 index 292baac6ea..0000000000 --- a/xen/include/asm-x86_64/softirq.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef __ASM_SOFTIRQ_H -#define __ASM_SOFTIRQ_H - -#include -#include - -#define cpu_bh_enable(cpu) \ - do { barrier(); local_bh_count(cpu)--; } while (0) -#define cpu_bh_disable(cpu) \ - do { local_bh_count(cpu)++; barrier(); } while (0) - -#define local_bh_disable() cpu_bh_disable(smp_processor_id()) -#define local_bh_enable() cpu_bh_enable(smp_processor_id()) - -#define in_softirq() (local_bh_count(smp_processor_id()) != 0) - -#endif /* __ASM_SOFTIRQ_H */ diff --git a/xen/include/asm-x86_64/spinlock.h b/xen/include/asm-x86_64/spinlock.h deleted file mode 100644 index 50e7ffec43..0000000000 --- a/xen/include/asm-x86_64/spinlock.h +++ /dev/null @@ -1,174 +0,0 @@ -#ifndef __ASM_SPINLOCK_H -#define __ASM_SPINLOCK_H - -#include -#include -#include -#include - -#if 0 -#define SPINLOCK_DEBUG 1 -#else -#define SPINLOCK_DEBUG 0 -#endif - -/* - * Your basic SMP spinlocks, allowing only a single CPU anywhere - */ - -typedef struct { - volatile unsigned int lock; -#if SPINLOCK_DEBUG - unsigned magic; -#endif -} spinlock_t; - -#define SPINLOCK_MAGIC 0xdead4ead - -#if SPINLOCK_DEBUG -#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC -#else -#define SPINLOCK_MAGIC_INIT /* */ -#endif - -#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT } - -#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) - -/* - * Simple spin lock operations. There are two variants, one clears IRQ's - * on the local processor, one does not. - * - * We make no fairness assumptions. They have a cost. - */ - -#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0) -#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) - -#define spin_lock_string \ - "\n1:\t" \ - "lock ; decb %0\n\t" \ - "js 2f\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\t" \ - "cmpb $0,%0\n\t" \ - "rep;nop\n\t" \ - "jle 2b\n\t" \ - "jmp 1b\n" \ - ".previous" - -/* - * This works. Despite all the confusion. - */ -#define spin_unlock_string \ - "movb $1,%0" - -static inline int spin_trylock(spinlock_t *lock) -{ - char oldval; - __asm__ __volatile__( - "xchgb %b0,%1" - :"=q" (oldval), "=m" (lock->lock) - :"0" (0) : "memory"); - return oldval > 0; -} - -static inline void spin_lock(spinlock_t *lock) -{ -#if SPINLOCK_DEBUG - __label__ here; -here: - if (lock->magic != SPINLOCK_MAGIC) { -printk("eip: %p\n", &&here); - BUG(); - } -#endif - __asm__ __volatile__( - spin_lock_string - :"=m" (lock->lock) : : "memory"); -} - -static inline void spin_unlock(spinlock_t *lock) -{ -#if SPINLOCK_DEBUG - if (lock->magic != SPINLOCK_MAGIC) - BUG(); - if (!spin_is_locked(lock)) - BUG(); -#endif - __asm__ __volatile__( - spin_unlock_string - :"=m" (lock->lock) : : "memory"); -} - -/* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - */ -typedef struct { - volatile unsigned int lock; -#if SPINLOCK_DEBUG - unsigned magic; -#endif -} rwlock_t; - -#define RWLOCK_MAGIC 0xdeaf1eed - -#if SPINLOCK_DEBUG -#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC -#else -#define RWLOCK_MAGIC_INIT /* */ -#endif - -#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } - -#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) - -/* - * On x86, we implement read-write locks as a 32-bit counter - * with the high bit (sign) being the "contended" bit. - * - * The inline assembly is non-obvious. Think about it. - * - * Changed to use the same technique as rw semaphores. See - * semaphore.h for details. -ben - */ -/* the spinlock helpers are in arch/x86_64/kernel/semaphore.S */ - -static inline void read_lock(rwlock_t *rw) -{ -#if SPINLOCK_DEBUG - if (rw->magic != RWLOCK_MAGIC) - BUG(); -#endif - __build_read_lock(rw, "__read_lock_failed"); -} - -static inline void write_lock(rwlock_t *rw) -{ -#if SPINLOCK_DEBUG - if (rw->magic != RWLOCK_MAGIC) - BUG(); -#endif - __build_write_lock(rw, "__write_lock_failed"); -} - -#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") -#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") - -static inline int write_trylock(rwlock_t *lock) -{ - atomic_t *count = (atomic_t *)lock; - if (atomic_sub_and_test(RW_LOCK_BIAS, count)) - return 1; - atomic_add(RW_LOCK_BIAS, count); - return 0; -} - -#endif /* __ASM_SPINLOCK_H */ diff --git a/xen/include/asm-x86_64/string.h b/xen/include/asm-x86_64/string.h deleted file mode 100644 index 875e0e2747..0000000000 --- a/xen/include/asm-x86_64/string.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _X86_64_STRING_H_ -#define _X86_64_STRING_H_ - -#ifdef __KERNEL__ - -/* Written 2002 by Andi Kleen */ - -/* Only used for special circumstances. Stolen from i386/string.h */ -static inline void * __inline_memcpy(void * to, const void * from, size_t n) -{ -unsigned long d0, d1, d2; -__asm__ __volatile__( - "rep ; movsl\n\t" - "testb $2,%b4\n\t" - "je 1f\n\t" - "movsw\n" - "1:\ttestb $1,%b4\n\t" - "je 2f\n\t" - "movsb\n" - "2:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) - : "memory"); -return (to); -} - -/* Even with __builtin_ the compiler may decide to use the out of line - function. */ - -#define __HAVE_ARCH_MEMCPY 1 -extern void *__memcpy(void *to, const void *from, size_t len); -#define memcpy(dst,src,len) \ - ({ size_t __len = (len); \ - void *__ret; \ - if (__builtin_constant_p(len) && __len >= 64) \ - __ret = __memcpy((dst),(src),__len); \ - else \ - __ret = __builtin_memcpy((dst),(src),__len); \ - __ret; }) - - -#define __HAVE_ARCH_MEMSET -#define memset __builtin_memset - -#define __HAVE_ARCH_MEMMOVE -void * memmove(void * dest,const void *src,size_t count); - -#endif /* __KERNEL__ */ - -#endif diff --git a/xen/include/asm-x86_64/system.h b/xen/include/asm-x86_64/system.h deleted file mode 100644 index b6b6172381..0000000000 --- a/xen/include/asm-x86_64/system.h +++ /dev/null @@ -1,220 +0,0 @@ -#ifndef __ASM_SYSTEM_H -#define __ASM_SYSTEM_H - -#include -#include - -/* Clear and set 'TS' bit respectively */ -#define clts() __asm__ __volatile__ ("clts") -#define stts() write_cr0(X86_CR0_TS|read_cr0()) - -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory"); - -static inline unsigned long get_limit(unsigned long segment) -{ - unsigned long __limit; - __asm__("lsll %1,%0" - :"=r" (__limit):"r" (segment)); - return __limit+1; -} - -#define nop() __asm__ __volatile__ ("nop") - -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) - -#define __xg(x) ((volatile long *)(x)) - -extern inline void set_64bit(volatile unsigned long *ptr, unsigned long val) -{ - *ptr = val; -} - -#define _set_64bit set_64bit - -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK - */ -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) -{ - switch (size) { - case 1: - __asm__ __volatile__("xchgb %b0,%1" - :"=q" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 2: - __asm__ __volatile__("xchgw %w0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 4: - __asm__ __volatile__("xchgl %k0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 8: - __asm__ __volatile__("xchgq %0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - } - return x; -} - -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -#define __HAVE_ARCH_CMPXCHG 1 - -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -#define cmpxchg(ptr,o,n)\ - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ - (unsigned long)(n),sizeof(*(ptr)))) - - -/* - * This function causes longword _o to be changed to _n at location _p. - * If this access causes a fault then we return 1, otherwise we return 0. - * If no fault occurs then _o is updated to teh value we saw at _p. If this - * is the same as the initial value of _o then _n is written to location _p. - */ -#define cmpxchg_user(_p,_o,_n) \ -({ \ - int _rc; \ - __asm__ __volatile__ ( \ - "1: " LOCK_PREFIX "cmpxchgq %2,%3\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $1,%1\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,3b\n" \ - ".previous" \ - : "=a" (_o), "=r" (_rc) \ - : "q" (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \ - : "memory"); \ - _rc; \ -}) - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#endif - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - * - * For now, "wmb()" doesn't actually do anything, as all - * Intel CPU's follow what Intel calls a *Processor Order*, - * in which all writes are seen in the program order even - * outside the CPU. - * - * I expect future Intel CPU's to have a weaker ordering, - * but I'd also expect them to finally get their act together - * and add some real memory barriers if so. - */ -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") -#define wmb() asm volatile("sfence":::"memory") -#define set_mb(var, value) do { xchg(&var, value); } while (0) -#define set_wmb(var, value) do { var = value; wmb(); } while (0) - -#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) - -/* interrupt control.. */ -#define __save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) -#define __restore_flags(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") -#define __cli() __asm__ __volatile__("cli": : :"memory") -#define __sti() __asm__ __volatile__("sti": : :"memory") -/* used in the idle loop; sti takes one instruction cycle to complete */ -#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") - -/* For spinlocks etc */ -#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) -#define local_irq_set(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_set \n\t pushfq ; popq %0 ; sti":"=g" (x): /* no input */ :"memory"); } while (0) -#define local_irq_restore(x) __asm__ __volatile__("# local_irq_restore \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory") -#define local_irq_disable() __cli() -#define local_irq_enable() __sti() - -#ifdef CONFIG_SMP - -extern void __global_cli(void); -extern void __global_sti(void); -extern unsigned long __global_save_flags(void); -extern void __global_restore_flags(unsigned long); -#define cli() __global_cli() -#define sti() __global_sti() -#define save_flags(x) ((x)=__global_save_flags()) -#define restore_flags(x) __global_restore_flags(x) - -#else - -#define cli() __cli() -#define sti() __sti() -#define save_flags(x) __save_flags(x) -#define restore_flags(x) __restore_flags(x) - -#endif - -/* Default simics "magic" breakpoint */ -#define icebp() asm volatile("xchg %%bx,%%bx" ::: "ebx") - -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -void disable_hlt(void); -void enable_hlt(void); - -#endif diff --git a/xen/include/asm-x86_64/time.h b/xen/include/asm-x86_64/time.h deleted file mode 100644 index 40145ddb0f..0000000000 --- a/xen/include/asm-x86_64/time.h +++ /dev/null @@ -1,23 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- - **************************************************************************** - * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge - **************************************************************************** - * - * File: time.h - * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) - * - * Environment: Xen Hypervisor - * Description: Architecture dependent definition of time variables - */ - -#ifndef _ASM_TIME_H_ -#define _ASM_TIME_H_ - -#include -#include - -typedef s64 s_time_t; /* system time */ - -extern int using_apic_timer; - -#endif /* _ASM_TIME_H_ */ diff --git a/xen/include/asm-x86_64/timex.h b/xen/include/asm-x86_64/timex.h deleted file mode 100644 index 7b6835763a..0000000000 --- a/xen/include/asm-x86_64/timex.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * linux/include/asm-x8664/timex.h - * - * x8664 architecture timex specifications - */ -#ifndef _ASMx8664_TIMEX_H -#define _ASMx8664_TIMEX_H - -#include -#include - -#define CLOCK_TICK_RATE (vxtime_hz) -#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ - 1000000 / CLOCK_TICK_RATE) << (SHIFT_SCALE - SHIFT_HZ)) / HZ) - -/* - * We only use the low 32 bits, and we'd simply better make sure - * that we reschedule before that wraps. Scheduling at least every - * four billion cycles just basically sounds like a good idea, - * regardless of how fast the machine is. - */ -typedef unsigned long long cycles_t; - -extern cycles_t cacheflush_time; - -static inline cycles_t get_cycles (void) -{ - unsigned long long ret; - rdtscll(ret); - return ret; -} - -extern unsigned int cpu_khz; - -/* - * Documentation on HPET can be found at: - * http://www.intel.com/ial/home/sp/pcmmspec.htm - * ftp://download.intel.com/ial/home/sp/mmts098.pdf - */ - -#define HPET_ID 0x000 -#define HPET_PERIOD 0x004 -#define HPET_CFG 0x010 -#define HPET_STATUS 0x020 -#define HPET_COUNTER 0x0f0 -#define HPET_T0_CFG 0x100 -#define HPET_T0_CMP 0x108 -#define HPET_T0_ROUTE 0x110 - -#define HPET_ID_VENDOR 0xffff0000 -#define HPET_ID_LEGSUP 0x00008000 -#define HPET_ID_NUMBER 0x00000f00 -#define HPET_ID_REV 0x000000ff - -#define HPET_CFG_ENABLE 0x001 -#define HPET_CFG_LEGACY 0x002 - -#define HPET_T0_ENABLE 0x004 -#define HPET_T0_PERIODIC 0x008 -#define HPET_T0_SETVAL 0x040 -#define HPET_T0_32BIT 0x100 - -/*extern struct vxtime_data vxtime; */ -extern unsigned long vxtime_hz; -extern unsigned long hpet_address; - -#endif diff --git a/xen/include/asm-x86_64/types.h b/xen/include/asm-x86_64/types.h deleted file mode 100644 index 25a78f28c6..0000000000 --- a/xen/include/asm-x86_64/types.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef _X86_64_TYPES_H -#define _X86_64_TYPES_H - -typedef unsigned short umode_t; - -typedef unsigned long size_t; - -/* - * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the - * header files exported to user space - */ - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -typedef __signed__ long long __s64; -typedef unsigned long long __u64; - -#include - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long long s64; -typedef unsigned long long u64; - -#define BITS_PER_LONG 64 - -typedef u64 dma64_addr_t; -typedef u64 dma_addr_t; - -#endif diff --git a/xen/include/asm-x86_64/uaccess.h b/xen/include/asm-x86_64/uaccess.h deleted file mode 100644 index 952e1b2f0a..0000000000 --- a/xen/include/asm-x86_64/uaccess.h +++ /dev/null @@ -1,314 +0,0 @@ -#ifndef __X86_64_UACCESS_H -#define __X86_64_UACCESS_H - -/* - * User space memory access functions - */ -#include -#include -#include -#include -#include - -#define VERIFY_READ 0 -#define VERIFY_WRITE 1 - -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - -#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFFFFFFFFF) -#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) - -#define get_ds() (KERNEL_DS) -#define get_fs() (current->addr_limit) -#define set_fs(x) (current->addr_limit = (x)) - -#define segment_eq(a,b) ((a).seg == (b).seg) - -#define __addr_ok(addr) (!((unsigned long)(addr) & (current->addr_limit.seg))) - -/* - * Uhhuh, this needs 65-bit arithmetic. We have a carry.. - */ -#define __range_not_ok(addr,size) ({ \ - unsigned long flag,sum; \ - asm("# range_ok\n\r" \ - "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \ - :"=&r" (flag), "=r" (sum) \ - :"1" (addr),"g" ((long)(size)),"g" (current->addr_limit.seg)); \ - flag; }) - -#define access_ok(type,addr,size) (__range_not_ok(addr,size) == 0) - -extern inline int verify_area(int type, const void * addr, unsigned long size) -{ - return access_ok(type,addr,size) ? 0 : -EFAULT; -} - - -/* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - */ - -struct exception_table_entry -{ - unsigned long insn, fixup; -}; - - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - * - * This gets kind of ugly. We want to return _two_ values in "get_user()" - * and yet we don't want to do any pointers, because that is too much - * of a performance impact. Thus we have a few rather ugly macros here, - * and hide all the ugliness from the user. - * - * The "__xxx" versions of the user access functions are versions that - * do not verify the address space, that must have been done previously - * with a separate "access_ok()" call (this is used when we do multiple - * accesses to the same area of user memory). - */ - -extern void __get_user_1(void); -extern void __get_user_2(void); -extern void __get_user_4(void); -extern void __get_user_8(void); - -#define __get_user_x(size,ret,x,ptr) \ - __asm__ __volatile__("call __get_user_" #size \ - :"=a" (ret),"=d" (x) \ - :"0" (ptr) \ - :"rbx") - -/* Careful: we have to cast the result to the type of the pointer for sign reasons */ -#define get_user(x,ptr) \ -({ long __val_gu; \ - int __ret_gu=1; \ - switch(sizeof (*(ptr))) { \ -+ case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1);break; \ -+ case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2);break; \ -+ case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4);break; \ -+ case 8: __ret_gu=copy_from_user(&__val_gu,ptr,8);break; \ -+ default: __ret_gu=copy_from_user(&__val_gu,ptr,sizeof(*(ptr)));break;\ - /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \ - /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \ - /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \ - /*case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break;*/ \ - /*default: __get_user_bad(); break;*/ \ - } \ - (x) = (__typeof__(*(ptr)))__val_gu; \ - __ret_gu; \ -}) - -extern void __put_user_1(void); -extern void __put_user_2(void); -extern void __put_user_4(void); -extern void __put_user_8(void); - -extern void __put_user_bad(void); - -#define __put_user_x(size,ret,x,ptr) \ - __asm__ __volatile__("call __put_user_" #size \ - :"=a" (ret) \ - :"0" (ptr),"d" (x) \ - :"rbx") - -#define put_user(x,ptr) \ - __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) - -#define __get_user(x,ptr) \ - __get_user_nocheck((x),(ptr),sizeof(*(ptr))) -#define __put_user(x,ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) - -#define __put_user_nocheck(x,ptr,size) \ -({ \ - int __pu_err; \ - __put_user_size((x),(ptr),(size),__pu_err); \ - __pu_err; \ -}) - - -#define __put_user_check(x,ptr,size) \ -({ \ - int __pu_err = -EFAULT; \ - __typeof__(*(ptr)) *__pu_addr = (ptr); \ - if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ - __put_user_size((x),__pu_addr,(size),__pu_err); \ - __pu_err; \ -}) - -#define __put_user_size(x,ptr,size,retval) \ -do { \ - retval = 0; \ - switch (size) { \ - case 1: __put_user_asm(x,ptr,retval,"b","b","iq",-EFAULT); break;\ - case 2: __put_user_asm(x,ptr,retval,"w","w","ir",-EFAULT); break;\ - case 4: __put_user_asm(x,ptr,retval,"l","k","ir",-EFAULT); break;\ - case 8: __put_user_asm(x,ptr,retval,"q","","ir",-EFAULT); break;\ - default: __put_user_bad(); \ - } \ -} while (0) - -/* FIXME: this hack is definitely wrong -AK */ -struct __large_struct { unsigned long buf[100]; }; -#define __m(x) (*(struct __large_struct *)(x)) - -/* - * Tell gcc we read from memory instead of writing: this is because - * we do not write to any memory gcc knows about, so there are no - * aliasing issues. - */ -#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \ - __asm__ __volatile__( \ - "1: mov"itype" %"rtype"1,%2\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov %3,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ - " .quad 1b,3b\n" \ - ".previous" \ - : "=r"(err) \ - : ltype (x), "m"(__m(addr)), "i"(errno), "0"(err)) - - -#define __get_user_nocheck(x,ptr,size) \ -({ \ - int __gu_err; \ - long __gu_val; \ - __get_user_size(__gu_val,(ptr),(size),__gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ - __gu_err; \ -}) - -extern int __get_user_bad(void); - -#define __get_user_size(x,ptr,size,retval) \ -do { \ - retval = 0; \ - switch (size) { \ - case 1: __get_user_asm(x,ptr,retval,"b","b","=q",-EFAULT); break;\ - case 2: __get_user_asm(x,ptr,retval,"w","w","=r",-EFAULT); break;\ - case 4: __get_user_asm(x,ptr,retval,"l","k","=r",-EFAULT); break;\ - case 8: __get_user_asm(x,ptr,retval,"q","","=r",-EFAULT); break;\ - default: (x) = __get_user_bad(); \ - } \ -} while (0) - -#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \ - __asm__ __volatile__( \ - "1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov %3,%0\n" \ - " xor"itype" %"rtype"1,%"rtype"1\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ - " .quad 1b,3b\n" \ - ".previous" \ - : "=r"(err), ltype (x) \ - : "m"(__m(addr)), "i"(errno), "0"(err)) - -/* - * Copy To/From Userspace - */ - -/* Handles exceptions in both to and from, but doesn't do access_ok */ -extern unsigned long copy_user_generic(void *to, const void *from, unsigned len); - -extern unsigned long copy_to_user(void *to, const void *from, unsigned len); -extern unsigned long copy_from_user(void *to, const void *from, unsigned len); - -static inline int __copy_from_user(void *dst, const void *src, unsigned size) -{ - if (!__builtin_constant_p(size)) - return copy_user_generic(dst,src,size); - int ret = 0; - switch (size) { - case 1:__get_user_asm(*(u8*)dst,(u8 *)src,ret,"b","b","=q",1); - return ret; - case 2:__get_user_asm(*(u16*)dst,(u16*)src,ret,"w","w","=r",2); - return ret; - case 4:__get_user_asm(*(u32*)dst,(u32*)src,ret,"l","k","=r",4); - return ret; - case 8:__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",8); - return ret; - case 10: - __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16); - if (ret) return ret; - __get_user_asm(*(u16*)(8+dst),(u16*)(8+src),ret,"w","w","=r",2); - return ret; - case 16: - __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16); - if (ret) return ret; - __get_user_asm(*(u64*)(8+dst),(u64*)(8+src),ret,"q","","=r",8); - return ret; - default: - return copy_user_generic(dst,src,size); - } -} - -static inline int __copy_to_user(void *dst, const void *src, unsigned size) -{ - if (!__builtin_constant_p(size)) - return copy_user_generic(dst,src,size); - int ret = 0; - switch (size) { - case 1:__put_user_asm(*(u8*)src,(u8 *)dst,ret,"b","b","iq",1); - return ret; - case 2:__put_user_asm(*(u16*)src,(u16*)dst,ret,"w","w","ir",2); - return ret; - case 4:__put_user_asm(*(u32*)src,(u32*)dst,ret,"l","k","ir",4); - return ret; - case 8:__put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",8); - return ret; - case 10: - __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",10); - if (ret) return ret; - asm("":::"memory"); - __put_user_asm(4[(u16*)src],4+(u16*)dst,ret,"w","w","ir",2); - return ret; - case 16: - __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",16); - if (ret) return ret; - asm("":::"memory"); - __put_user_asm(1[(u64*)src],1+(u64*)dst,ret,"q","","ir",8); - return ret; - default: - return copy_user_generic(dst,src,size); - } -} - -long strncpy_from_user(char *dst, const char *src, long count); -long __strncpy_from_user(char *dst, const char *src, long count); -long strnlen_user(const char *str, long n); -long strlen_user(const char *str); -unsigned long clear_user(void *mem, unsigned long len); -unsigned long __clear_user(void *mem, unsigned long len); - -extern unsigned long search_exception_table(unsigned long); - -#endif /* __X86_64_UACCESS_H */ diff --git a/xen/include/asm-x86_64/unaligned.h b/xen/include/asm-x86_64/unaligned.h deleted file mode 100644 index d4bf78dc6f..0000000000 --- a/xen/include/asm-x86_64/unaligned.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef __X8664_UNALIGNED_H -#define __X8664_UNALIGNED_H - -/* - * The x86-64 can do unaligned accesses itself. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. - */ - -/** - * get_unaligned - get value from possibly mis-aligned location - * @ptr: pointer to value - * - * This macro should be used for accessing values larger in size than - * single bytes at locations that are expected to be improperly aligned, - * e.g. retrieving a u16 value from a location not u16-aligned. - * - * Note that unaligned accesses can be very expensive on some architectures. - */ -#define get_unaligned(ptr) (*(ptr)) - -/** - * put_unaligned - put value to a possibly mis-aligned location - * @val: value to place - * @ptr: pointer to location - * - * This macro should be used for placing values larger in size than - * single bytes at locations that are expected to be improperly aligned, - * e.g. writing a u16 value to a location not u16-aligned. - * - * Note that unaligned accesses can be very expensive on some architectures. - */ -#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) - -#endif diff --git a/xen/include/hypervisor-ifs/arch-i386/hypervisor-if.h b/xen/include/hypervisor-ifs/arch-i386/hypervisor-if.h deleted file mode 100644 index 80055a5062..0000000000 --- a/xen/include/hypervisor-ifs/arch-i386/hypervisor-if.h +++ /dev/null @@ -1,138 +0,0 @@ -/****************************************************************************** - * arch-i386/hypervisor-if.h - * - * Guest OS interface to x86 32-bit Xen. - */ - -#ifndef __HYPERVISOR_IF_I386_H__ -#define __HYPERVISOR_IF_I386_H__ - -/* - * Pointers and other address fields inside interface structures are padded to - * 64 bits. This means that field alignments aren't different between 32- and - * 64-bit architectures. - */ -/* NB. Multi-level macro ensures __LINE__ is expanded before concatenation. */ -#define __MEMORY_PADDING(_X) u32 __pad_ ## _X -#define _MEMORY_PADDING(_X) __MEMORY_PADDING(_X) -#define MEMORY_PADDING _MEMORY_PADDING(__LINE__) - -/* - * SEGMENT DESCRIPTOR TABLES - */ -/* - * A number of GDT entries are reserved by Xen. These are not situated at the - * start of the GDT because some stupid OSes export hard-coded selector values - * in their ABI. These hard-coded values are always near the start of the GDT, - * so Xen places itself out of the way. - * - * NB. The reserved range is inclusive (that is, both FIRST_RESERVED_GDT_ENTRY - * and LAST_RESERVED_GDT_ENTRY are reserved). - */ -#define NR_RESERVED_GDT_ENTRIES 40 -#define FIRST_RESERVED_GDT_ENTRY 256 -#define LAST_RESERVED_GDT_ENTRY \ - (FIRST_RESERVED_GDT_ENTRY + NR_RESERVED_GDT_ENTRIES - 1) - - -/* - * These flat segments are in the Xen-private section of every GDT. Since these - * are also present in the initial GDT, many OSes will be able to avoid - * installing their own GDT. - */ -#define FLAT_RING1_CS 0x0819 /* GDT index 259 */ -#define FLAT_RING1_DS 0x0821 /* GDT index 260 */ -#define FLAT_RING3_CS 0x082b /* GDT index 261 */ -#define FLAT_RING3_DS 0x0833 /* GDT index 262 */ - -#define FLAT_GUESTOS_CS FLAT_RING1_CS -#define FLAT_GUESTOS_DS FLAT_RING1_DS -#define FLAT_USER_CS FLAT_RING3_CS -#define FLAT_USER_DS FLAT_RING3_DS - -/* And the trap vector is... */ -#define TRAP_INSTR "int $0x82" - - -/* - * Virtual addresses beyond this are not modifiable by guest OSes. The - * machine->physical mapping table starts at this address, read-only. - */ -#define HYPERVISOR_VIRT_START (0xFC000000UL) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif - -#ifndef __ASSEMBLY__ - -/* NB. Both the following are 32 bits each. */ -typedef unsigned long memory_t; /* Full-sized pointer/address/memory-size. */ -typedef unsigned long cpureg_t; /* Full-sized register. */ - -/* - * Send an array of these to HYPERVISOR_set_trap_table() - */ -#define TI_GET_DPL(_ti) ((_ti)->flags & 3) -#define TI_GET_IF(_ti) ((_ti)->flags & 4) -#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) -#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) -typedef struct { - u8 vector; /* 0: exception vector */ - u8 flags; /* 1: 0-3: privilege level; 4: clear event enable? */ - u16 cs; /* 2: code selector */ - memory_t address; /* 4: code address */ -} PACKED trap_info_t; /* 8 bytes */ - -typedef struct -{ - unsigned long ebx; - unsigned long ecx; - unsigned long edx; - unsigned long esi; - unsigned long edi; - unsigned long ebp; - unsigned long eax; - unsigned long ds; - unsigned long es; - unsigned long fs; - unsigned long gs; - unsigned long _unused; - unsigned long eip; - unsigned long cs; - unsigned long eflags; - unsigned long esp; - unsigned long ss; -} PACKED execution_context_t; - -typedef struct { - u32 tsc_bits; /* 0: 32 bits read from the CPU's TSC. */ - u32 tsc_bitshift; /* 4: 'tsc_bits' uses N:N+31 of TSC. */ -} PACKED tsc_timestamp_t; /* 8 bytes */ - -/* - * The following is all CPU context. Note that the i387_ctxt block is filled - * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. - */ -typedef struct { -#define ECF_I387_VALID (1<<0) - unsigned long flags; - execution_context_t cpu_ctxt; /* User-level CPU registers */ - char fpu_ctxt[256]; /* User-level FPU registers */ - trap_info_t trap_ctxt[256]; /* Virtual IDT */ - unsigned int fast_trap_idx; /* "Fast trap" vector offset */ - unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ - unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ - unsigned long guestos_ss, guestos_esp; /* Virtual TSS (only SS1/ESP1) */ - unsigned long pt_base; /* CR3 (pagetable base) */ - unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ - unsigned long event_callback_cs; /* CS:EIP of event callback */ - unsigned long event_callback_eip; - unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ - unsigned long failsafe_callback_eip; -} PACKED full_execution_context_t; - -#define ARCH_HAS_FAST_TRAP - -#endif - -#endif diff --git a/xen/include/hypervisor-ifs/arch-x86/hypervisor-if.h b/xen/include/hypervisor-ifs/arch-x86/hypervisor-if.h new file mode 100644 index 0000000000..80055a5062 --- /dev/null +++ b/xen/include/hypervisor-ifs/arch-x86/hypervisor-if.h @@ -0,0 +1,138 @@ +/****************************************************************************** + * arch-i386/hypervisor-if.h + * + * Guest OS interface to x86 32-bit Xen. + */ + +#ifndef __HYPERVISOR_IF_I386_H__ +#define __HYPERVISOR_IF_I386_H__ + +/* + * Pointers and other address fields inside interface structures are padded to + * 64 bits. This means that field alignments aren't different between 32- and + * 64-bit architectures. + */ +/* NB. Multi-level macro ensures __LINE__ is expanded before concatenation. */ +#define __MEMORY_PADDING(_X) u32 __pad_ ## _X +#define _MEMORY_PADDING(_X) __MEMORY_PADDING(_X) +#define MEMORY_PADDING _MEMORY_PADDING(__LINE__) + +/* + * SEGMENT DESCRIPTOR TABLES + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way. + * + * NB. The reserved range is inclusive (that is, both FIRST_RESERVED_GDT_ENTRY + * and LAST_RESERVED_GDT_ENTRY are reserved). + */ +#define NR_RESERVED_GDT_ENTRIES 40 +#define FIRST_RESERVED_GDT_ENTRY 256 +#define LAST_RESERVED_GDT_ENTRY \ + (FIRST_RESERVED_GDT_ENTRY + NR_RESERVED_GDT_ENTRIES - 1) + + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0x0819 /* GDT index 259 */ +#define FLAT_RING1_DS 0x0821 /* GDT index 260 */ +#define FLAT_RING3_CS 0x082b /* GDT index 261 */ +#define FLAT_RING3_DS 0x0833 /* GDT index 262 */ + +#define FLAT_GUESTOS_CS FLAT_RING1_CS +#define FLAT_GUESTOS_DS FLAT_RING1_DS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS + +/* And the trap vector is... */ +#define TRAP_INSTR "int $0x82" + + +/* + * Virtual addresses beyond this are not modifiable by guest OSes. The + * machine->physical mapping table starts at this address, read-only. + */ +#define HYPERVISOR_VIRT_START (0xFC000000UL) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +#ifndef __ASSEMBLY__ + +/* NB. Both the following are 32 bits each. */ +typedef unsigned long memory_t; /* Full-sized pointer/address/memory-size. */ +typedef unsigned long cpureg_t; /* Full-sized register. */ + +/* + * Send an array of these to HYPERVISOR_set_trap_table() + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) +typedef struct { + u8 vector; /* 0: exception vector */ + u8 flags; /* 1: 0-3: privilege level; 4: clear event enable? */ + u16 cs; /* 2: code selector */ + memory_t address; /* 4: code address */ +} PACKED trap_info_t; /* 8 bytes */ + +typedef struct +{ + unsigned long ebx; + unsigned long ecx; + unsigned long edx; + unsigned long esi; + unsigned long edi; + unsigned long ebp; + unsigned long eax; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; + unsigned long _unused; + unsigned long eip; + unsigned long cs; + unsigned long eflags; + unsigned long esp; + unsigned long ss; +} PACKED execution_context_t; + +typedef struct { + u32 tsc_bits; /* 0: 32 bits read from the CPU's TSC. */ + u32 tsc_bitshift; /* 4: 'tsc_bits' uses N:N+31 of TSC. */ +} PACKED tsc_timestamp_t; /* 8 bytes */ + +/* + * The following is all CPU context. Note that the i387_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + */ +typedef struct { +#define ECF_I387_VALID (1<<0) + unsigned long flags; + execution_context_t cpu_ctxt; /* User-level CPU registers */ + char fpu_ctxt[256]; /* User-level FPU registers */ + trap_info_t trap_ctxt[256]; /* Virtual IDT */ + unsigned int fast_trap_idx; /* "Fast trap" vector offset */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long guestos_ss, guestos_esp; /* Virtual TSS (only SS1/ESP1) */ + unsigned long pt_base; /* CR3 (pagetable base) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +} PACKED full_execution_context_t; + +#define ARCH_HAS_FAST_TRAP + +#endif + +#endif diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index 5202c60d4c..387af5865c 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -9,7 +9,6 @@ #include #include -#include #include #include #include